Пример #1
0
    def testExecuteBothExecutedAndNot(self):
        data = np.random.random((5, 9))

        arr1 = mt.tensor(data, chunk_size=4) * 2
        arr2 = mt.tensor(data) + 1

        np.testing.assert_array_equal(arr2.to_numpy(), data + 1)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr2).chunks[0].key] = data + 2

        results = sess.run(arr1, arr2)
        np.testing.assert_array_equal(results[0], data * 2)
        np.testing.assert_array_equal(results[1], data + 2)
Пример #2
0
    def testGraphDeviceAssigner(self):
        import mars.tensor as mt

        a = mt.random.rand(10, 10, chunk_size=5, gpu=True)
        b = a.sum(axis=1)
        graph = b.build_graph(tiled=True, compose=False)

        assigner = GraphDeviceAssigner(graph,
                                       list(n.op for n in graph.iter_indep()),
                                       devices=[0, 1])
        assigner.assign()

        a = get_tiled(a)
        self.assertEqual(a.cix[0, 0].device, a.cix[0, 1].device)
        self.assertEqual(a.cix[1, 0].device, a.cix[1, 1].device)
        self.assertNotEqual(a.cix[0, 0].device, a.cix[1, 0].device)
Пример #3
0
    def _check_nsplits(self, tileable):
        from mars.tiles import get_tiled
        tiled = get_tiled(tileable)
        if tiled.nsplits == () and len(tiled.chunks) == 1:
            return

        nsplit_chunk_shape = tuple(len(s) for s in tiled.nsplits)
        if nsplit_chunk_shape != tiled.chunk_shape:
            raise AssertionError(
                'Operand %r: shape of nsplits %r not consistent with chunk shape %r'
                % (tiled.op, nsplit_chunk_shape, tiled.chunk_shape)) from None

        nsplit_shape = tuple(np.sum(s) for s in tiled.nsplits)
        try:
            self.assert_shape_consistent(nsplit_shape, tiled.shape)
        except AssertionError:
            raise AssertionError(
                'Operand %r: shape computed from nsplits %r -> %r not consistent with real shape %r'
                %
                (tiled.op, tiled.nsplits, nsplit_shape, tiled.shape)) from None

        for c in tiled.chunks:
            try:
                tiled_c = tiled.cix[c.index]
            except ValueError as ex:
                raise AssertionError(
                    'Operand %r: Malformed index %r, nsplits is %r. Raw error is %r'
                    % (c.op, c.index, tiled.nsplits, ex)) from None

            if tiled_c is not c:
                raise AssertionError(
                    'Operand %r: Cannot spot chunk via index %r, nsplits is %r'
                    % (c.op, c.index, tiled.nsplits))
        for cid, shape in enumerate(itertools.product(*tiled.nsplits)):
            chunk_shape = self._raw_chunk_shapes.get(
                tiled.chunks[cid].key) or tiled.chunks[cid].shape
            if len(shape) != len(chunk_shape):
                raise AssertionError(
                    'Operand %r: Shape in nsplits %r does not meet shape in chunk %r'
                    % (tiled.chunks[cid].op, shape, chunk_shape))
            for s1, s2 in zip(shape, chunk_shape):
                if (not (np.isnan(s1) and np.isnan(s2))) and s1 != s2:
                    raise AssertionError(
                        'Operand %r: Shape in nsplits %r does not meet shape in chunk %r'
                        % (tiled.chunks[cid].op, shape, chunk_shape))
Пример #4
0
    def testElf(self):
        raw = np.random.rand(10, 8, 5)
        t = tensor(raw, chunk_size=3)

        r = erf(t)
        expect = scipy_erf(raw)

        self.assertEqual(r.shape, raw.shape)
        self.assertEqual(r.dtype, expect.dtype)

        r = r.tiles()
        t = get_tiled(t)

        self.assertEqual(r.nsplits, t.nsplits)
        for c in r.chunks:
            self.assertIsInstance(c.op, TensorErf)
            self.assertEqual(c.index, c.inputs[0].index)
            self.assertEqual(c.shape, c.inputs[0].shape)
Пример #5
0
    def run_simple_calc(self, session_id):
        self._session_id = session_id

        import mars.tensor as mt
        arr = mt.ones((4,), chunk_size=4) + 1
        graph = arr.build_graph(compose=False, tiled=True)

        arr = get_tiled(arr)
        self._array_key = arr.chunks[0].key

        graph_key = self._graph_key = str(uuid.uuid4())
        execution_ref = self.promise_ref(ExecutionActor.default_uid())
        execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                    dict(chunks=[arr.chunks[0].key]), None, _tell=True)

        execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
            .then(lambda *_: self._results.append((True,))) \
            .catch(lambda *exc: self._results.append((False, exc)))
Пример #6
0
    def testDataFrameExecuteNotFetch(self):
        data1 = pd.DataFrame(np.random.random((5, 4)), columns=list('abcd'))
        sess = Session.default_or_local()

        df1 = md.DataFrame(data1, chunk_size=2)

        with self.assertRaises(ValueError):
            sess.fetch(df1)

        self.assertIs(df1.execute(), df1)

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(df1).chunks[0].key] = data1.iloc[:2, :2] * 3

        expected = data1
        expected.iloc[:2, :2] = data1.iloc[:2, :2] * 3

        pd.testing.assert_frame_equal(df1.to_pandas(), expected)
Пример #7
0
    def testTensorExecuteNotFetch(self):
        data = np.random.random((5, 9))
        sess = Session.default_or_local()

        arr1 = mt.tensor(data, chunk_size=2) * 2

        with self.assertRaises(ValueError):
            sess.fetch(arr1)

        self.assertIs(arr1.execute(), arr1)

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = data[:2, :2] * 3

        expected = data * 2
        expected[:2, :2] = data[:2, :2] * 3

        np.testing.assert_array_equal(arr1.to_numpy(), expected)
Пример #8
0
    def testSliceTiles(self):
        t = ones((100, 200, 300), chunk_size=30)
        t2 = t[10:40, 199:, -30:303]
        t2 = t2.tiles()
        t = get_tiled(t)

        self.assertEqual(t2.chunk_shape, (2, 1, 1))
        self.assertEqual(t2.chunks[0].inputs[0], t.cix[0, -1, -1].data)
        self.assertEqual(
            t2.chunks[0].op.indexes,
            [slice(10, 30, 1), slice(19, 20, 1),
             slice(None)])
        self.assertEqual(t2.chunks[0].index, (0, 0, 0))
        self.assertEqual(t2.chunks[1].inputs[0], t.cix[1, -1, -1].data)
        self.assertEqual(
            t2.chunks[1].op.indexes,
            [slice(0, 10, 1), slice(19, 20, 1),
             slice(None)])
        self.assertEqual(t2.chunks[1].index, (1, 0, 0))
Пример #9
0
    def testFetch(self):
        sess = new_session()

        arr1 = mt.ones((10, 5), chunk_size=3)

        r1 = sess.run(arr1)
        r2 = sess.run(arr1)
        np.testing.assert_array_equal(r1, r2)

        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
            (3, 3)) * 2
        r3 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3)

        # rerun to ensure arr1's chunk results still exist
        r4 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3)

        arr2 = mt.ones((10, 5), chunk_size=3)
        r5 = sess.run(arr2)
        np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2)

        r6 = sess.run(arr2 + 1)
        np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3)

        # test fetch multiple tensors
        raw = np.random.rand(5, 10)
        arr1 = mt.ones((5, 10), chunk_size=5)
        arr2 = mt.tensor(raw, chunk_size=3)
        arr3 = mt.sum(arr2)

        sess.run(arr1, arr2, arr3)

        fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3)
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3])
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())
Пример #10
0
    def testReExecuteExisting(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc')
            pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder')

            import mars.tensor as mt
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            result_tensor = get_tiled(result_tensor)

            def _validate(_):
                data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                assert_array_equal(data, mock_data + np.ones((4,)))

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #11
0
    def testDistributedTile(self):
        X, y, w = self.X, self.y, self.weight

        X = X.tiles()
        y = y.tiles()
        w = w.tiles()

        workers = ['addr1:1', 'addr2:1']
        chunk_to_workers = dict()
        X_chunk_to_workers = {c.key: workers[i % 2] for i, c in enumerate(X.chunks)}
        chunk_to_workers.update(X_chunk_to_workers)
        y_chunk_to_workers = {c.key: workers[i % 2] for i, c in enumerate(y.chunks)}
        chunk_to_workers.update(y_chunk_to_workers)
        w_chunk_to_workers = {c.key: workers[i % 2] for i, c in enumerate(w.chunks)}
        chunk_to_workers.update(w_chunk_to_workers)

        class MockDistributedDictContext(ContextBase):
            @property
            def running_mode(self):
                return RunningMode.distributed

            def get_chunk_metas(self, chunk_keys):
                metas = []
                for ck in chunk_keys:
                    if ck in chunk_to_workers:
                        metas.append(ChunkMeta(chunk_size=None, chunk_shape=None,
                                               workers=[chunk_to_workers[ck]]))
                    else:
                        metas.append(ChunkMeta(chunk_size=None, chunk_shape=None,
                                               workers=None))
                return metas

        dmatrix = ToDMatrix(data=X, label=y, weight=w)()
        model = XGBTrain(dtrain=dmatrix)()

        with MockDistributedDictContext():
            model = model.tiles()
            dmatrix = get_tiled(dmatrix)

            # 2 workers
            self.assertEqual(len(dmatrix.chunks), 2)
            self.assertEqual(len(model.chunks), 2)
Пример #12
0
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.to_numpy()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.to_numpy()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2

        result3 = arr4.to_numpy()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.to_numpy()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.to_numpy()

        np.testing.assert_array_equal(result1, result2)
Пример #13
0
    def run_test(self, worker, calc_device=None):
        import mars.tensor as mt
        from mars.worker import ExecutionActor

        session_id = str(uuid.uuid4())

        gpu = calc_device in ('cuda',)
        a = mt.random.rand(100, 50, chunk_size=30, gpu=gpu)
        b = mt.random.rand(50, 200, chunk_size=30, gpu=gpu)
        result = a.dot(b)

        graph = result.build_graph(tiled=True)
        result = get_tiled(result)

        executor_ref = self.promise_ref(ExecutionActor.default_uid(), address=worker)
        io_meta = dict(chunks=[c.key for c in result.chunks])

        graph_key = str(id(graph))
        executor_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                   io_meta, None, calc_device=calc_device, _promise=True) \
            .then(lambda *_: setattr(self, '_replied', True))
Пример #14
0
    def testRechunk(self):
        tensor = ones((12, 9), chunk_size=4)
        new_tensor = tensor.rechunk(3)
        new_tensor = new_tensor.tiles()

        self.assertEqual(len(new_tensor.chunks), 12)
        self.assertEqual(new_tensor.chunks[0].inputs[0],
                         get_tiled(tensor).chunks[0].data)
        self.assertEqual(len(new_tensor.chunks[1].inputs), 2)
        self.assertEqual(
            new_tensor.chunks[1].inputs[0].op.slices,
            [slice(None, 3, None), slice(3, None, None)])
        self.assertEqual(
            new_tensor.chunks[1].inputs[1].op.slices,
            [slice(None, 3, None), slice(None, 2, None)])
        self.assertEqual(len(new_tensor.chunks[-1].inputs), 2)
        self.assertEqual(
            new_tensor.chunks[-1].inputs[0].op.slices,
            [slice(1, None, None), slice(2, None, None)])
        self.assertEqual(new_tensor.chunks[-1].inputs[1].op.slices,
                         [slice(1, None, None),
                          slice(None, None, None)])
Пример #15
0
    def testToCPU(self):
        data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        self.assertEqual(df.index_value, df2.index_value)
        self.assertEqual(df.columns_value, df2.columns_value)
        self.assertFalse(df2.op.gpu)
        pd.testing.assert_series_equal(df.dtypes, df2.dtypes)

        df2 = df2.tiles()
        df = get_tiled(df)

        self.assertEqual(df.nsplits, df2.nsplits)
        self.assertEqual(df.chunks[0].index_value, df2.chunks[0].index_value)
        self.assertEqual(df.chunks[0].columns_value, df2.chunks[0].columns_value)
        self.assertFalse(df2.chunks[0].op.gpu)
        pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes)

        self.assertIs(df2, to_cpu(df2))
Пример #16
0
    def testIterativeTiling(self):
        sess = new_session()

        rs = np.random.RandomState(0)
        raw = rs.rand(100)
        a = mt.tensor(raw, chunk_size=10)
        a.sort()
        c = a[:5]

        ret = sess.run(c)
        np.testing.assert_array_equal(ret, np.sort(raw)[:5])

        executor = sess._sess.executor
        self.assertEqual(len(executor.chunk_result), 1)
        executor.chunk_result.clear()

        raw1 = rs.rand(20)
        raw2 = rs.rand(20)
        a = mt.tensor(raw1, chunk_size=10)
        a.sort()
        b = mt.tensor(raw2, chunk_size=15) + 1
        c = mt.concatenate([a[:10], b])
        c.sort()
        d = c[:5]

        ret = sess.run(d)
        expected = np.sort(np.concatenate([np.sort(raw1)[:10], raw2 + 1]))[:5]
        np.testing.assert_array_equal(ret, expected)
        self.assertEqual(len(executor.chunk_result), len(get_tiled(d).chunks))

        raw = rs.rand(100)
        a = mt.tensor(raw, chunk_size=10)
        a.sort()
        b = a + 1
        c = b[:5]

        ret = sess.run([b, c])
        expected = np.sort(raw + 1)[:5]
        np.testing.assert_array_equal(ret[1], expected)
Пример #17
0
    def testSendTargets(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)
            result_tensor = get_tiled(result_tensor)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor, [mock_data + np.ones((4,))], 'out', uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:
                def _validate(*_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)
                execution_ref.send_data_to_workers(
                    session_id, graph_key, {result_key: (pool_address,)}, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #18
0
    def testEstimateGraphFinishTime(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)

            status_ref = pool.actor_ref(StatusActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((10, 8), chunk_size=10)
            graph = arr.build_graph(compose=False, tiled=True)

            arr = get_tiled(arr)

            graph_key = str(uuid.uuid4())

            for _ in range(options.optimize.min_stats_count + 1):
                status_ref.update_mean_stats(
                    'calc_speed.' + type(arr.chunks[0].op).__name__, 10)
                status_ref.update_mean_stats('disk_read_speed', 10)
                status_ref.update_mean_stats('disk_write_speed', 10)
                status_ref.update_mean_stats('net_transfer_speed', 10)

            execution_ref.execute_graph(session_id, graph_key,
                                        serialize_graph(graph),
                                        dict(chunks=[arr.chunks[0].key]), None)
            execution_ref.estimate_graph_finish_time(session_id, graph_key)

            stats_dict = status_ref.get_stats(
                ['min_est_finish_time', 'max_est_finish_time'])
            self.assertIsNotNone(stats_dict.get('min_est_finish_time'))
            self.assertIsNotNone(stats_dict.get('max_est_finish_time'))
Пример #19
0
    def testConcatenate(self):
        a = ones((10, 20, 30), chunk_size=10)
        b = ones((20, 20, 30), chunk_size=20)

        c = concatenate([a, b])
        self.assertEqual(c.shape, (30, 20, 30))

        a = ones((10, 20, 30), chunk_size=10)
        b = ones((10, 20, 40), chunk_size=20)

        c = concatenate([a, b], axis=-1)
        self.assertEqual(c.shape, (10, 20, 70))

        with self.assertRaises(ValueError):
            a = ones((10, 20, 30), chunk_size=10)
            b = ones((20, 30, 30), chunk_size=20)

            concatenate([a, b])

        with self.assertRaises(ValueError):
            a = ones((10, 20, 30), chunk_size=10)
            b = ones((20, 20), chunk_size=20)

            concatenate([a, b])

        a = ones((10, 20, 30), chunk_size=5)
        b = ones((20, 20, 30), chunk_size=10)

        c = concatenate([a, b]).tiles()
        a = get_tiled(a)
        self.assertEqual(c.chunk_shape[0], 4)
        self.assertEqual(c.chunk_shape[1], 4)
        self.assertEqual(c.chunk_shape[2], 6)
        self.assertEqual(c.nsplits, ((5, 5, 10, 10), (5, ) * 4, (5, ) * 6))
        self.assertEqual(c.cix[0, 0, 0].key, a.cix[0, 0, 0].key)
        self.assertEqual(c.cix[1, 0, 0].key, a.cix[1, 0, 0].key)
Пример #20
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas[modified_chunk.key] = WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #21
0
    def testPrepareSpilled(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(IORunnerActor)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address))}

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                self.waitp(
                    test_actor.storage_client.put_objects(
                            session_id, [modified_chunk.key], [mock_data], [DataStorageDevice.PROC_MEMORY])
                        .then(lambda *_: test_actor.storage_client.copy_to(
                            session_id, [modified_chunk.key], [DataStorageDevice.DISK]))
                )
                test_actor.storage_client.delete(session_id, [modified_chunk.key],
                                                 [DataStorageDevice.PROC_MEMORY])

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #22
0
    def testPrepareQuota(self, *_):
        pinned = True

        orig_pin = SharedHolderActor.pin_data_keys

        def _mock_pin(self, session_id, chunk_keys, token):
            from mars.errors import PinDataKeyFailed
            if pinned:
                raise PinDataKeyFailed
            return orig_pin(self, session_id, chunk_keys, token)

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \
                create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.actor_ref(WorkerClusterInfoActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    nonlocal pinned
                    time.sleep(0.5)
                    pinned = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 0.5)
Пример #23
0
    def testSimpleExecution(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.datasource import TensorOnes
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(compose=False, tiled=True)

            arr = get_tiled(arr)
            arr2 = get_tiled(arr2)

            metas = dict()
            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)
                    metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape, pool_address)

            with self.run_actor_test(pool) as test_actor:
                session_id = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                self.waitp(
                    storage_client.put_objects(session_id, [arr.chunks[0].key], [np.ones((10, 8), dtype=np.int16)],
                                               [DataStorageDevice.SHARED_MEMORY]),
                )

                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #24
0
    def testRechunk(self):
        df = from_pandas_df(pd.DataFrame(np.random.rand(10, 10)), chunk_size=3)
        df2 = df.rechunk(4).tiles()

        self.assertEqual(df2.shape, (10, 10))
        self.assertEqual(len(df2.chunks), 9)

        self.assertEqual(df2.chunks[0].shape, (4, 4))
        pd.testing.assert_index_equal(df2.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(4))
        pd.testing.assert_index_equal(df2.chunks[0].columns_value.to_pandas(),
                                      pd.RangeIndex(4))

        self.assertEqual(df2.chunks[2].shape, (4, 2))
        pd.testing.assert_index_equal(df2.chunks[2].index_value.to_pandas(),
                                      pd.RangeIndex(4))
        pd.testing.assert_index_equal(df2.chunks[2].columns_value.to_pandas(),
                                      pd.RangeIndex(8, 10))

        self.assertEqual(df2.chunks[-1].shape, (2, 2))
        pd.testing.assert_index_equal(df2.chunks[-1].index_value.to_pandas(),
                                      pd.RangeIndex(8, 10))
        pd.testing.assert_index_equal(df2.chunks[-1].columns_value.to_pandas(),
                                      pd.RangeIndex(8, 10))

        columns = [np.random.bytes(10) for _ in range(10)]
        index = np.random.randint(-100, 100, size=(4, ))
        data = pd.DataFrame(np.random.rand(4, 10),
                            index=index,
                            columns=columns)
        df = from_pandas_df(data, chunk_size=3)
        df2 = df.rechunk(6).tiles()

        self.assertEqual(df2.shape, (4, 10))
        self.assertEqual(len(df2.chunks), 2)

        self.assertEqual(df2.chunks[0].shape, (4, 6))
        pd.testing.assert_index_equal(df2.chunks[0].index_value.to_pandas(),
                                      df.index_value.to_pandas())
        pd.testing.assert_index_equal(df2.chunks[0].columns_value.to_pandas(),
                                      pd.Index(columns[:6]))

        self.assertEqual(df2.chunks[1].shape, (4, 4))
        pd.testing.assert_index_equal(df2.chunks[1].index_value.to_pandas(),
                                      df.index_value.to_pandas())
        pd.testing.assert_index_equal(df2.chunks[1].columns_value.to_pandas(),
                                      pd.Index(columns[6:]))

        # test Series rechunk
        series = from_pandas_series(pd.Series(np.random.rand(10, )),
                                    chunk_size=3)
        series2 = series.rechunk(4).tiles()

        self.assertEqual(series2.shape, (10, ))
        self.assertEqual(len(series2.chunks), 3)
        pd.testing.assert_index_equal(series2.index_value.to_pandas(),
                                      pd.RangeIndex(10))

        self.assertEqual(series2.chunk_shape, (3, ))
        self.assertEqual(series2.nsplits, ((4, 4, 2), ))
        self.assertEqual(series2.chunks[0].shape, (4, ))
        pd.testing.assert_index_equal(
            series2.chunks[0].index_value.to_pandas(), pd.RangeIndex(4))
        self.assertEqual(series2.chunks[1].shape, (4, ))
        pd.testing.assert_index_equal(
            series2.chunks[1].index_value.to_pandas(), pd.RangeIndex(4, 8))
        self.assertEqual(series2.chunks[2].shape, (2, ))
        pd.testing.assert_index_equal(
            series2.chunks[2].index_value.to_pandas(), pd.RangeIndex(8, 10))

        series2 = series.rechunk(1).tiles()

        self.assertEqual(series2.shape, (10, ))
        self.assertEqual(len(series2.chunks), 10)
        pd.testing.assert_index_equal(series2.index_value.to_pandas(),
                                      pd.RangeIndex(10))

        self.assertEqual(series2.chunk_shape, (10, ))
        self.assertEqual(series2.nsplits, ((1, ) * 10, ))
        self.assertEqual(series2.chunks[0].shape, (1, ))
        pd.testing.assert_index_equal(
            series2.chunks[0].index_value.to_pandas(), pd.RangeIndex(1))

        # no need to rechunk
        series2 = series.rechunk(3).tiles()
        series = get_tiled(series)
        self.assertEqual(series2.chunk_shape, series.chunk_shape)
        self.assertEqual(series2.nsplits, series.nsplits)
Пример #25
0
    def testLU(self):
        a = mt.random.randint(1, 10, (6, 6), chunk_size=3)
        p, l_, u = mt.linalg.lu(a)

        l_ = l_.tiles()
        p, u = get_tiled(p), get_tiled(u)

        self.assertEqual(l_.shape, (6, 6))
        self.assertEqual(u.shape, (6, 6))
        self.assertEqual(p.shape, (6, 6))

        a = mt.random.randint(1, 10, (6, 6), chunk_size=(3, 2))
        p, l_, u = mt.linalg.lu(a)
        l_ = l_.tiles()
        p, u = get_tiled(p), get_tiled(u)

        self.assertEqual(l_.shape, (6, 6))
        self.assertEqual(u.shape, (6, 6))
        self.assertEqual(p.shape, (6, 6))

        self.assertEqual(p.nsplits, ((3, 3), (3, 3)))
        self.assertEqual(l_.nsplits, ((3, 3), (3, 3)))
        self.assertEqual(u.nsplits, ((3, 3), (3, 3)))

        a = mt.random.randint(1, 10, (7, 7), chunk_size=4)
        p, l_, u = mt.linalg.lu(a)
        l_ = l_.tiles()
        p, u = get_tiled(p), get_tiled(u)

        self.assertEqual(l_.shape, (7, 7))
        self.assertEqual(u.shape, (7, 7))
        self.assertEqual(p.shape, (7, 7))

        self.assertEqual(p.nsplits, ((4, 3), (4, 3)))
        self.assertEqual(l_.nsplits, ((4, 3), (4, 3)))
        self.assertEqual(u.nsplits, ((4, 3), (4, 3)))

        a = mt.random.randint(1, 10, (7, 5), chunk_size=4)
        p, l_, u = mt.linalg.lu(a)
        l_ = l_.tiles()
        p, u = get_tiled(p), get_tiled(u)

        self.assertEqual(l_.shape, (7, 5))
        self.assertEqual(u.shape, (5, 5))
        self.assertEqual(p.shape, (7, 7))

        a = mt.random.randint(1, 10, (5, 7), chunk_size=4)
        p, l_, u = mt.linalg.lu(a)
        l_ = l_.tiles()
        p, u = get_tiled(p), get_tiled(u)

        self.assertEqual(l_.shape, (5, 5))
        self.assertEqual(u.shape, (5, 7))
        self.assertEqual(p.shape, (5, 5))

        # test sparse
        data = sps.csr_matrix([[2, 0, 0, 0, 5, 2], [0, 6, 1, 0, 0, 6],
                               [8, 0, 9, 0, 0, 2], [0, 6, 0, 8, 7, 3],
                               [7, 0, 6, 1, 7, 0], [0, 0, 0, 7, 0, 8]])
        t = mt.tensor(data, chunk_size=3)
        p, l_, u = mt.linalg.lu(t)

        self.assertTrue(p.op.sparse)
        self.assertIsInstance(p, SparseTensor)
        self.assertTrue(l_.op.sparse)
        self.assertIsInstance(l_, SparseTensor)
        self.assertTrue(u.op.sparse)
        self.assertIsInstance(u, SparseTensor)

        p = p.tiles()
        l_, u = get_tiled(l_), get_tiled(u)
        self.assertTrue(all(c.is_sparse() for c in p.chunks))
        self.assertTrue(all(c.is_sparse() for c in l_.chunks))
        self.assertTrue(all(c.is_sparse() for c in u.chunks))
Пример #26
0
    def testFetch(self):
        sess = new_session()

        arr1 = mt.ones((10, 5), chunk_size=3)

        r1 = sess.run(arr1)
        r2 = sess.run(arr1)
        np.testing.assert_array_equal(r1, r2)

        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
            (3, 3)) * 2
        r3 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3)

        # rerun to ensure arr1's chunk results still exist
        r4 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3)

        arr2 = mt.ones((10, 5), chunk_size=3)
        r5 = sess.run(arr2)
        np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2)

        r6 = sess.run(arr2 + 1)
        np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3)

        df = md.DataFrame(np.random.rand(10, 2), columns=list('ab'))
        s = df['a'].map(lambda x: np.ones((3, 3)), dtype='object').sum()

        np.testing.assert_array_equal(s.execute().fetch(),
                                      np.ones((3, 3)) * 10)

        # test fetch multiple tensors
        raw = np.random.rand(5, 10)
        arr1 = mt.ones((5, 10), chunk_size=5)
        arr2 = mt.tensor(raw, chunk_size=3)
        arr3 = mt.sum(arr2)

        sess.run(arr1, arr2, arr3)

        fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3)
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3])
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        raw = np.random.rand(5, 10)
        arr = mt.tensor(raw, chunk_size=5)
        s = arr.sum()

        self.assertAlmostEqual(s.execute().fetch(), raw.sum())

        def _execute_ds(*_):  # pragma: no cover
            raise ValueError('cannot run random again')

        try:
            register(ArrayDataSource, _execute_ds)

            self.assertAlmostEqual(s.fetch(), raw.sum())
        finally:
            del Executor._op_runners[ArrayDataSource]
Пример #27
0
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.to_numpy()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.to_numpy()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2

        result3 = arr4.to_numpy()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.to_numpy()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test copy, make sure it will not let the execution cache missed
        df = md.DataFrame(mt.ones((10, 3), chunk_size=5))
        executed = [False]

        def add_one(x):
            if executed[0]:  # pragma: no cover
                raise ValueError('executed before')
            return x + 1

        df2 = df.apply(add_one)
        pd.testing.assert_frame_equal(df2.to_pandas(),
                                      pd.DataFrame(np.ones((10, 3)) + 1))

        executed[0] = True

        df3 = df2.copy()
        df4 = df3 * 2
        pd.testing.assert_frame_equal(df4.to_pandas(),
                                      pd.DataFrame(np.ones((10, 3)) * 4))
Пример #28
0
    def testQR(self):
        a = mt.random.rand(9, 6, chunk_size=(3, 6))
        q, r = mt.linalg.qr(a)

        self.assertEqual(q.shape, (9, 6))
        self.assertEqual(r.shape, (6, 6))

        q = q.tiles()
        r = get_tiled(r)

        self.assertEqual(len(q.chunks), 3)
        self.assertEqual(len(r.chunks), 1)
        self.assertEqual(q.nsplits, ((3, 3, 3), (6, )))
        self.assertEqual(r.nsplits, ((6, ), (6, )))

        self.assertEqual(q.chunks[0].shape, (3, 6))
        self.assertEqual(q.chunks[0].inputs[0].shape, (3, 3))
        self.assertEqual(q.chunks[0].inputs[1].shape, (3, 6))

        a = mt.random.rand(18, 6, chunk_size=(9, 6))
        q, r = mt.linalg.qr(a)

        self.assertEqual(q.shape, (18, 6))
        self.assertEqual(r.shape, (6, 6))

        q = q.tiles()
        r = get_tiled(r)

        self.assertEqual(len(q.chunks), 2)
        self.assertEqual(len(r.chunks), 1)
        self.assertEqual(q.nsplits, ((9, 9), (6, )))
        self.assertEqual(r.nsplits, ((6, ), (6, )))

        self.assertEqual(q.chunks[0].shape, (9, 6))
        self.assertEqual(q.chunks[0].inputs[0].shape, (9, 6))
        self.assertEqual(q.chunks[0].inputs[1].shape, (6, 6))

        # for Short-and-Fat QR
        a = mt.random.rand(6, 18, chunk_size=(6, 6))
        q, r = mt.linalg.qr(a, method='sfqr')

        self.assertEqual(q.shape, (6, 6))
        self.assertEqual(r.shape, (6, 18))

        q = q.tiles()
        r = get_tiled(r)

        self.assertEqual(len(q.chunks), 1)
        self.assertEqual(len(r.chunks), 3)
        self.assertEqual(q.nsplits, ((6, ), (6, )))
        self.assertEqual(r.nsplits, ((6, ), (6, 6, 6)))

        # chunk width less than height
        a = mt.random.rand(6, 9, chunk_size=(6, 3))
        q, r = mt.linalg.qr(a, method='sfqr')

        self.assertEqual(q.shape, (6, 6))
        self.assertEqual(r.shape, (6, 9))

        q = q.tiles()
        r = get_tiled(r)

        self.assertEqual(len(q.chunks), 1)
        self.assertEqual(len(r.chunks), 2)
        self.assertEqual(q.nsplits, ((6, ), (6, )))
        self.assertEqual(r.nsplits, ((6, ), (6, 3)))

        a = mt.random.rand(9, 6, chunk_size=(9, 3))
        q, r = mt.linalg.qr(a, method='sfqr')

        self.assertEqual(q.shape, (9, 6))
        self.assertEqual(r.shape, (6, 6))

        q = q.tiles()
        r = get_tiled(r)

        self.assertEqual(len(q.chunks), 1)
        self.assertEqual(len(r.chunks), 1)
        self.assertEqual(q.nsplits, ((9, ), (6, )))
        self.assertEqual(r.nsplits, ((6, ), (6, )))
Пример #29
0
    def testStoreHDF5Execution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t1 = tensor(raw, chunk_size=20)
        t2 = tensor(raw, chunk_size=9)

        with self.assertRaises(TypeError):
            tohdf5(object(), t2)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            with tempfile.TemporaryDirectory() as d:
                filename = os.path.join(d, 'test_store_{}.hdf5'.format(int(time.time())))

                # test 1 chunk
                r = tohdf5(filename, t1, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                # test filename
                r = tohdf5(filename, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                rt = get_tiled(r)
                self.assertEqual(type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive')
                self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    tohdf5(filename, t2)

                with h5py.File(filename, 'r') as f:
                    # test file
                    r = tohdf5(f, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    with h5py.File(filename, 'r') as f:
                        tohdf5(f, t2)

                with h5py.File(filename, 'r') as f:
                    # test dataset
                    ds = f['{}/{}'.format(group_name, dataset_name)]
                    # test file
                    r = tohdf5(ds, t2)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)
Пример #30
0
    def testSVD(self):
        a = mt.random.rand(9, 6, chunk_size=(3, 6))
        U, s, V = mt.linalg.svd(a)

        self.assertEqual(U.shape, (9, 6))
        self.assertEqual(s.shape, (6, ))
        self.assertEqual(V.shape, (6, 6))

        U = U.tiles()
        s, V = get_tiled(s), get_tiled(V)

        self.assertEqual(len(U.chunks), 3)
        self.assertEqual(U.chunks[0].shape, (3, 6))
        self.assertEqual(len(s.chunks), 1)
        self.assertEqual(s.chunks[0].shape, (6, ))
        self.assertEqual(len(V.chunks), 1)
        self.assertEqual(V.chunks[0].shape, (6, 6))

        self.assertEqual(U.chunks[0].inputs[0].shape, (3, 6))
        self.assertEqual(U.chunks[0].inputs[0].inputs[0].shape, (3, 3))
        self.assertEqual(U.chunks[0].inputs[0].inputs[1].shape, (3, 6))

        self.assertEqual(s.ndim, 1)
        self.assertEqual(len(s.chunks[0].index), 1)

        a = mt.random.rand(9, 6, chunk_size=(9, 6))
        U, s, V = mt.linalg.svd(a)

        self.assertEqual(U.shape, (9, 6))
        self.assertEqual(s.shape, (6, ))
        self.assertEqual(V.shape, (6, 6))

        U = U.tiles()
        s, V = get_tiled(s), get_tiled(V)

        self.assertEqual(len(U.chunks), 1)
        self.assertEqual(U.chunks[0].shape, (9, 6))
        self.assertEqual(len(s.chunks), 1)
        self.assertEqual(s.chunks[0].shape, (6, ))
        self.assertEqual(len(V.chunks), 1)
        self.assertEqual(V.chunks[0].shape, (6, 6))

        self.assertEqual(s.ndim, 1)
        self.assertEqual(len(s.chunks[0].index), 1)

        a = mt.random.rand(6, 20, chunk_size=10)
        U, s, V = mt.linalg.svd(a)

        self.assertEqual(U.shape, (6, 6))
        self.assertEqual(s.shape, (6, ))
        self.assertEqual(V.shape, (6, 20))

        U = U.tiles()
        s, V = get_tiled(s), get_tiled(V)

        self.assertEqual(len(U.chunks), 1)
        self.assertEqual(U.chunks[0].shape, (6, 6))
        self.assertEqual(len(s.chunks), 1)
        self.assertEqual(s.chunks[0].shape, (6, ))
        self.assertEqual(len(V.chunks), 1)
        self.assertEqual(V.chunks[0].shape, (6, 20))

        a = mt.random.rand(6, 9, chunk_size=(6, 9))
        U, s, V = mt.linalg.svd(a)

        self.assertEqual(U.shape, (6, 6))
        self.assertEqual(s.shape, (6, ))
        self.assertEqual(V.shape, (6, 9))

        rs = mt.random.RandomState(1)
        a = rs.rand(9, 6, chunk_size=(3, 6))
        U, s, V = mt.linalg.svd(a)

        # test tensor graph
        graph = DirectedGraph()
        U.build_graph(tiled=False, graph=graph)
        s.build_graph(tiled=False, graph=graph)
        new_graph = DirectedGraph.from_json(graph.to_json())
        self.assertEqual((len(new_graph)), 4)
        new_outputs = [
            n for n in new_graph if new_graph.count_predecessors(n) == 1
        ]
        self.assertEqual(len(new_outputs), 3)
        self.assertEqual(len(set([o.op for o in new_outputs])), 1)

        # test tensor graph, do some caculation
        graph = DirectedGraph()
        (U + 1).build_graph(tiled=False, graph=graph)
        (s + 1).build_graph(tiled=False, graph=graph)
        new_graph = DirectedGraph.from_json(graph.to_json())
        self.assertEqual((len(new_graph)), 6)
        new_outputs = [
            n for n in new_graph if new_graph.count_predecessors(n) == 1
        ]
        self.assertEqual(len(new_outputs), 5)
        self.assertEqual(len(set([o.op for o in new_outputs])), 3)

        a = rs.rand(20, 10, chunk_size=10)
        _, s, _ = mt.linalg.svd(a)
        del _
        graph = s.build_graph(tiled=False)
        self.assertEqual(len(graph), 4)