Exemplo n.º 1
0
    def testRegister(self):
        from mars.graph import DAG

        fake_result = np.random.rand(10, 10)
        fake_size = (fake_result.nbytes * 2, fake_result.nbytes * 2)

        def fake_execute(ctx, op):
            ctx[op.outputs[0].key] = fake_result

        def fake_estimate(ctx, op):
            ctx[op.outputs[0].key] = fake_size

        register(FakeOperand, fake_execute, fake_estimate)

        graph = DAG()
        chunk = FakeOperand().new_chunk(None, shape=(10, 10))
        graph.add_node(chunk.data)

        executor = Executor()
        res = executor.execute_graph(graph, keys=[chunk.key])[0]
        np.testing.assert_array_equal(res, fake_result)
        size = executor.execute_graph(graph, keys=[chunk.key], mock=True)[0]
        self.assertEqual(size, fake_size)

        graph = DAG()
        chunk = SubFakeOperand().new_chunk(None, shape=(10, 10))
        graph.add_node(chunk.data)

        executor = Executor()
        res = executor.execute_graph(graph, keys=[chunk.key])[0]
        np.testing.assert_array_equal(res, fake_result)
Exemplo n.º 2
0
        def execute_size(t):
            def _tensordot_size_recorder(ctx, op):
                TensorTensorDot.estimate_size(ctx, op)

                chunk_key = op.outputs[0].key
                chunk_sizes[chunk_key] = ctx[chunk_key]
                chunk_nbytes[chunk_key] = op.outputs[0].nbytes

                input_sizes = dict(
                    (inp.op.key, ctx[inp.key][0]) for inp in op.inputs)
                chunk_input_sizes[chunk_key] = sum(input_sizes.values())
                input_nbytes = dict(
                    (inp.op.key, inp.nbytes) for inp in op.inputs)
                chunk_input_nbytes[chunk_key] = sum(input_nbytes.values())

            size_executor = ExecutorForTest(
                sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)
            try:
                chunk_sizes.clear()
                chunk_nbytes.clear()
                chunk_input_sizes.clear()
                chunk_input_nbytes.clear()
                register(TensorTensorDot,
                         size_estimator=_tensordot_size_recorder)
                size_executor.execute_tensor(t, mock=True)
            finally:
                register_default(TensorTensorDot)
Exemplo n.º 3
0
    def testFetch(self):
        with tempfile.TemporaryDirectory() as tempdir:
            filename = os.path.join(tempdir, 'test_fetch.csv')
            pd_df = pd.DataFrame({
                'a': [3, 4, 5, 3, 5, 4, 1, 2, 3],
                'b': [1, 3, 4, 5, 6, 5, 4, 4, 4],
                'c': list('aabaaddce'),
                'd': list('abaaaddce')
            })
            pd_df.to_csv(filename, index=False)

            df = md.read_csv(filename)
            df2 = df.groupby('d').agg({'b': 'min'})
            expected = pd_df.groupby('d').agg({'b': 'min'})
            _ = df2.execute()

            def _execute_read_csv(*_):  # pragma: no cover
                raise ValueError('cannot run read_csv again')

            try:
                register(DataFrameReadCSV, _execute_read_csv)

                pd.testing.assert_frame_equal(df2.fetch(), expected)
                pd.testing.assert_frame_equal(df2.iloc[:3].fetch(),
                                              expected.iloc[:3])
            finally:
                del Executor._op_runners[DataFrameReadCSV]
Exemplo n.º 4
0
    def testFetch(self):
        sess = new_session()

        arr1 = mt.ones((10, 5), chunk_size=3)

        r1 = sess.run(arr1)
        r2 = sess.run(arr1)
        np.testing.assert_array_equal(r1, r2)

        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
            (3, 3)) * 2
        r3 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3)

        # rerun to ensure arr1's chunk results still exist
        r4 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3)

        arr2 = mt.ones((10, 5), chunk_size=3)
        r5 = sess.run(arr2)
        np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2)

        r6 = sess.run(arr2 + 1)
        np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3)

        # test fetch multiple tensors
        raw = np.random.rand(5, 10)
        arr1 = mt.ones((5, 10), chunk_size=5)
        arr2 = mt.tensor(raw, chunk_size=3)
        arr3 = mt.sum(arr2)

        sess.run(arr1, arr2, arr3)

        fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3)
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3])
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        raw = np.random.rand(5, 10)
        arr = mt.tensor(raw, chunk_size=5)
        s = arr.sum()

        self.assertAlmostEqual(s.execute().fetch(), raw.sum())

        def _execute_ds(*_):  # pragma: no cover
            raise ValueError('cannot run random again')

        try:
            register(ArrayDataSource, _execute_ds)

            self.assertAlmostEqual(s.fetch(), raw.sum())
        finally:
            del Executor._op_runners[ArrayDataSource]
Exemplo n.º 5
0
    def _inject_execute_data_source(limit, op_cls):
        def _execute_data_source(ctx, op):
            op_cls.execute(ctx, op)
            result = ctx[op.outputs[0].key]
            if len(result) > limit:
                raise RuntimeError(
                    'have data more than expected')  # pragma: no cover

        try:
            register(op_cls, _execute_data_source)
            yield
        finally:
            del Executor._op_runners[op_cls]
Exemplo n.º 6
0
    def _raise_iloc(self):
        def _execute_iloc(*_):  # pragma: no cover
            raise ValueError('cannot run iloc')

        self.ctx.__enter__()
        try:
            register(DataFrameIlocGetItem, _execute_iloc)
            register(SeriesIlocGetItem, _execute_iloc)

            yield
        finally:
            del Executor._op_runners[DataFrameIlocGetItem]
            del Executor._op_runners[SeriesIlocGetItem]
            self.ctx.__exit__(None, None, None)
Exemplo n.º 7
0
    def _inject_execute_data_source_mixed(limit, usecols, op_cls):
        def _execute_data_source(ctx, op):  # pragma: no cover
            op_cls.execute(ctx, op)
            result = ctx[op.outputs[0].key]
            if not isinstance(usecols, list):
                if not isinstance(result, pd.Series):
                    raise RuntimeError('Out data should be a Series')
            elif len(result.columns) > len(usecols):
                raise RuntimeError('have data more than expected')
            if len(result) > limit:
                raise RuntimeError('have data more than expected')

        try:
            register(op_cls, _execute_data_source)
            yield
        finally:
            del Executor._op_runners[op_cls]
Exemplo n.º 8
0
    def _inject_execute_data_source_usecols(usecols, op_cls):
        def _execute_data_source(ctx, op):  # pragma: no cover
            op_cls.execute(ctx, op)
            result = ctx[op.outputs[0].key]
            if not isinstance(usecols, list):
                if not isinstance(result, pd.Series):
                    raise RuntimeError('Out data should be a Series, '
                                       f'got {type(result)}')
            elif len(result.columns) > len(usecols):
                params = dict((k, getattr(op, k, None)) for k in op._keys_
                              if k not in op._no_copy_attrs_)
                raise RuntimeError(
                    f'have data more than expected, got {result.columns}, '
                    f'result {result}, op params {params}')

        try:
            register(op_cls, _execute_data_source)
            yield
        finally:
            del Executor._op_runners[op_cls]
Exemplo n.º 9
0
 def post_create(self):
     register(FakeOperand, fake_execution_maker(self.ctx))
Exemplo n.º 10
0
except ImportError:
    create_mars_cluster = None
    to_mars_dataframe = None
    persist_mars_dataframe = None
    run_mars_script = None
    run_mars_job = None
    list_mars_instances = None

try:
    from . import dataframe
except ImportError:
    dataframe = None

try:
    from . import tensor
except ImportError:
    tensor = None

try:
    from mars.executor import register
    from mars.remote.core import RemoteFunction
    from .core import execute_with_odps_context
    from .run_script import RunScript

    register(RemoteFunction, execute_with_odps_context(RemoteFunction.execute))
    register(RunScript, execute_with_odps_context(RunScript.execute))
except ImportError:
    pass

INTERNAL_PATTERN = '\/[^\.]+\.[^\.-]+\.[^\.-]+\-[^\.-]+\.'
Exemplo n.º 11
0
try:
    import xgboost

    from mars.learn.contrib.xgboost.start_tracker import StartTracker
except ImportError:
    xgboost = None

if xgboost and os.environ.get('TEST_START_TRACKER') == '1':

    def _patch_start_tracker_estimator(ctx, op: StartTracker):
        op.estimate_size(ctx, op)
        estimated_size = ctx[op.outputs[0].key]
        assert estimated_size[0] == estimated_size[1] == calc_data_size(
            op.outputs[0])

    register(StartTracker, StartTracker.execute,
             _patch_start_tracker_estimator)


@unittest.skipIf(xgboost is None, 'xgboost not installed')
class Test(IntegrationTestBase):
    def setUp(self):
        n_rows = 1000
        n_columns = 10
        chunk_size = 20
        rs = mt.random.RandomState(0)
        self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size)
        self.y = rs.rand(n_rows, chunk_size=chunk_size)
        super().setUp()

    @property
    def _extra_worker_options(self):
Exemplo n.º 12
0
# Copyright 1999-2020 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from mars.dataframe.indexing.iloc import DataFrameIlocGetItem
from mars.executor import register


def _execute_iloc(*_):  # pragma: no cover
    raise ValueError('cannot run iloc')


register(DataFrameIlocGetItem, _execute_iloc)