def testBoolIndexingTiles(self): t = ones((100, 200, 300), chunk_size=30) indexed = t[t < 2] indexed = indexed.tiles() t = get_tiled(t) self.assertEqual(len(indexed.chunks), 280) self.assertEqual(indexed.chunks[0].index, (0, )) self.assertEqual(indexed.chunks[20].index, (20, )) self.assertIs(indexed.chunks[20].inputs[0], t.cix[(0, 2, 0)].data) self.assertIs(indexed.chunks[20].inputs[1], indexed.op.indexes[0].cix[0, 2, 0].data) t2 = ones((100, 200), chunk_size=30) indexed2 = t[t2 < 2] indexed2 = indexed2.tiles() t = get_tiled(t) self.assertEqual(len(indexed2.chunks), 280) self.assertEqual(len(indexed2.chunks[0].shape), 2) self.assertTrue(np.isnan(indexed2.chunks[0].shape[0])) self.assertEqual(indexed2.chunks[0].shape[1], 30) self.assertEqual(indexed2.chunks[20].inputs[0], t.cix[(0, 2, 0)].data) self.assertEqual(indexed2.chunks[20].inputs[1], indexed2.op.indexes[0].cix[0, 2].data)
def testUnifyChunkAdd(self): t1 = ones(4, chunk_size=2) t2 = ones(1, chunk_size=1) t3 = t1 + t2 t3 = t3.tiles() t1, t2 = get_tiled(t1), get_tiled(t2) self.assertEqual(len(t3.chunks), 2) self.assertEqual(t3.chunks[0].inputs[0], t1.chunks[0].data) self.assertEqual(t3.chunks[0].inputs[1], t2.chunks[0].data) self.assertEqual(t3.chunks[1].inputs[0], t1.chunks[1].data) self.assertEqual(t3.chunks[1].inputs[1], t2.chunks[0].data)
def testAddWithOut(self): t1 = ones((3, 4), chunk_size=2) t2 = ones(4, chunk_size=2) t3 = add(t1, t2, out=t1) self.assertIsInstance(t1.op, TensorAdd) self.assertEqual(t1.op.out.key, t1.op.lhs.key) self.assertIs(t3, t1) self.assertEqual(t3.shape, (3, 4)) self.assertEqual(t3.op.lhs.extra_params.raw_chunk_size, 2) self.assertIs(t3.op.rhs, t2.data) self.assertNotEqual(t3.key, t3.op.lhs.key) t3.tiles() t1 = get_tiled(t1) self.assertIsInstance(t1.chunks[0].op, TensorAdd) self.assertEqual(t1.chunks[0].op.out.key, t1.chunks[0].op.lhs.key) with self.assertRaises(TypeError): add(t1, t2, out=1) with self.assertRaises(ValueError): add(t1, t2, out=t2) with self.assertRaises(TypeError): truediv(t1, t2, out=t1.astype('i8')) t1 = ones((3, 4), chunk_size=2, dtype=float) t2 = ones(4, chunk_size=2, dtype=int) t3 = add(t2, 1, out=t1) self.assertEqual(t3.shape, (3, 4)) self.assertEqual(t3.dtype, np.float64)
def testAggregateResult(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) t = tensor(raw, chunk_size=6) slc = slice(None, None, 3) # test no reorder fancy_index = np.array([3, 6, 7]) indexes = [slc, fancy_index] result = t[indexes].tiles() handler = NDArrayIndexesHandler() context = handler.handle(result.op, return_context=True) self.assertGreater(context.op.outputs[0].chunk_shape[-1], 1) chunk_results = self.executor.execute_tensor(result) chunk_results = \ [(c.index, r) for c, r in zip(get_tiled(result).chunks, chunk_results)] expected = self.executor.execute_tensor(result, concat=True)[0] res = handler.aggregate_result(context, chunk_results) np.testing.assert_array_equal(res, expected) # test fancy index that requires reordering fancy_index = np.array([6, 7, 3]) indexes = [slc, fancy_index] test = t[indexes].tiles() context = handler.handle(test.op, return_context=True) self.assertEqual(context.op.outputs[0].chunk_shape[-1], 1) res = handler.aggregate_result(context, chunk_results) expected = self.executor.execute_tensor(test, concat=True)[0] np.testing.assert_array_equal(res, expected)
def testToCPU(self): data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10, )), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) df2 = to_cpu(cdf) self.assertEqual(df.index_value, df2.index_value) self.assertEqual(df.columns_value, df2.columns_value) self.assertFalse(df2.op.gpu) pd.testing.assert_series_equal(df.dtypes, df2.dtypes) df2 = df2.tiles() df = get_tiled(df) self.assertEqual(df.nsplits, df2.nsplits) self.assertEqual(df.chunks[0].index_value, df2.chunks[0].index_value) self.assertEqual(df.chunks[0].columns_value, df2.chunks[0].columns_value) self.assertFalse(df2.chunks[0].op.gpu) pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes) self.assertIs(df2, to_cpu(df2))
def testPermutation(self): x = permutation(10) self.assertEqual(x.shape, (10, )) self.assertIsInstance(x.op, TensorPermutation) x = x.tiles() self.assertEqual(len(x.chunks), 1) self.assertIsInstance(x.chunks[0].op, TensorPermutation) arr = from_ndarray([1, 4, 9, 12, 15], chunk_size=2) x = permutation(arr) self.assertEqual(x.shape, (5, )) self.assertIsInstance(x.op, TensorPermutation) x = x.tiles() arr = get_tiled(arr) self.assertEqual(len(x.chunks), 3) self.assertTrue(np.isnan(x.chunks[0].shape[0])) self.assertIs(x.chunks[0].inputs[0].inputs[0].inputs[0], arr.chunks[0].data) arr = rand(3, 3, chunk_size=2) x = permutation(arr) self.assertEqual(x.shape, (3, 3)) self.assertIsInstance(x.op, TensorPermutation) x = x.tiles() arr = get_tiled(arr) self.assertEqual(len(x.chunks), 4) self.assertTrue(np.isnan(x.chunks[0].shape[0])) self.assertEqual(x.chunks[0].shape[1], 2) self.assertIs(x.cix[0, 0].inputs[0].inputs[0].inputs[0], arr.cix[0, 0].data) self.assertIs(x.cix[0, 0].inputs[0].inputs[1].inputs[0], arr.cix[1, 0].data) self.assertEqual(x.cix[0, 0].op.seed, x.cix[0, 1].op.seed) self.assertEqual(x.cix[0, 0].inputs[0].inputs[0].inputs[0].op.seed, x.cix[1, 0].inputs[0].inputs[0].inputs[0].op.seed) with self.assertRaises(np.AxisError): self.assertRaises(permutation('abc'))
def testReExecuteSame(self): data = np.random.random((5, 9)) # test run the same tensor arr4 = mt.tensor(data.copy(), chunk_size=3) + 1 result1 = arr4.to_numpy() expected = data + 1 np.testing.assert_array_equal(result1, expected) result2 = arr4.to_numpy() np.testing.assert_array_equal(result1, result2) # test run the same tensor with single chunk arr4 = mt.tensor(data.copy()) result1 = arr4.to_numpy() expected = data np.testing.assert_array_equal(result1, expected) result2 = arr4.to_numpy() np.testing.assert_array_equal(result1, result2) # modify result sess = Session.default_or_local() executor = sess._sess._executor executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2 result3 = arr4.to_numpy() np.testing.assert_array_equal(result3, data + 2) # test run same key tensor arr5 = mt.ones((10, 10), chunk_size=3) result1 = arr5.to_numpy() del arr5 arr6 = mt.ones((10, 10), chunk_size=3) result2 = arr6.to_numpy() np.testing.assert_array_equal(result1, result2) # test copy, make sure it will not let the execution cache missed df = md.DataFrame(mt.ones((10, 3), chunk_size=5)) executed = [False] def add_one(x): if executed[0]: # pragma: no cover raise ValueError('executed before') return x + 1 df2 = df.apply(add_one) pd.testing.assert_frame_equal(df2.to_pandas(), pd.DataFrame(np.ones((10, 3)) + 1)) executed[0] = True df3 = df2.copy() df4 = df3 * 2 pd.testing.assert_frame_equal(df4.to_pandas(), pd.DataFrame(np.ones((10, 3)) * 4))
def testIndicesIndexingTiles(self): t = ones((10, 20, 30), chunk_size=(2, 20, 30)) t2 = t[3] t2 = t2.tiles() t = get_tiled(t) self.assertEqual(len(t2.chunks), 1) self.assertIs(t2.chunks[0].inputs[0], t.cix[1, 0, 0].data) self.assertEqual(t2.chunks[0].op.indexes[0], 1) t3 = t[4] t3 = t3.tiles() t = get_tiled(t) self.assertEqual(len(t3.chunks), 1) self.assertIs(t3.chunks[0].inputs[0], t.cix[2, 0, 0].data) self.assertEqual(t3.chunks[0].op.indexes[0], 0)
def testToGPU(self): # test dataframe data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10, )), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) self.assertEqual(df.index_value, cdf.index_value) self.assertEqual(df.columns_value, cdf.columns_value) self.assertTrue(cdf.op.gpu) pd.testing.assert_series_equal(df.dtypes, cdf.dtypes) cdf = cdf.tiles() df = get_tiled(df) self.assertEqual(df.nsplits, cdf.nsplits) self.assertEqual(df.chunks[0].index_value, cdf.chunks[0].index_value) self.assertEqual(df.chunks[0].columns_value, cdf.chunks[0].columns_value) self.assertTrue(cdf.chunks[0].op.gpu) pd.testing.assert_series_equal(df.chunks[0].dtypes, cdf.chunks[0].dtypes) self.assertIs(cdf, to_gpu(cdf)) # test series sdata = data.iloc[:, 0] series = from_pandas_series(sdata) cseries = to_gpu(series) self.assertEqual(series.index_value, cseries.index_value) self.assertTrue(cseries.op.gpu) cseries = cseries.tiles() series = get_tiled(series) self.assertEqual(series.nsplits, cseries.nsplits) self.assertEqual(series.chunks[0].index_value, cseries.chunks[0].index_value) self.assertTrue(cseries.chunks[0].op.gpu) self.assertIs(cseries, to_gpu(cseries))
def testSendTargets(self): pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(fuse_enabled=False, tiled=True) result_tensor = get_tiled(result_tensor) result_key = result_tensor.chunks[0].key pool.create_actor(MockSenderActor, [mock_data + np.ones((4, ))], 'out', uid='w:mock_sender') with self.run_actor_test(pool) as test_actor: def _validate(*_): data = test_actor.shared_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.send_data_to_workers( session_id, graph_key, {result_key: (pool_address, )}, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testMultiOutputsOp(self): sess = new_session() rs = np.random.RandomState(0) raw = rs.rand(20, 5) a = mt.tensor(raw, chunk_size=5) q = mt.abs(mt.linalg.qr(a)[0]) ret = sess.run(q) np.testing.assert_almost_equal(ret, np.abs(np.linalg.qr(raw)[0])) self.assertEqual(len(sess._sess.executor.chunk_result), len(get_tiled(q).chunks))
def testReadZarrExecution(self): test_array = np.random.RandomState(0).rand(20, 10) group_name = 'test_group' dataset_name = 'test_dataset' with self.assertRaises(TypeError): fromzarr(object()) with tempfile.TemporaryDirectory() as d: path = os.path.join(d, f'test_read_{int(time.time())}.zarr') group = zarr.group(path) arr = group.array(group_name + '/' + dataset_name, test_array, chunks=(7, 4)) r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) arr = zarr.open_array(f'{path}/{group_name}/{dataset_name}') r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path, group=group_name, dataset=dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path + '/' + group_name + '/' + dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1)
def testFancyIndexingNumpyExecution(self): # test fancy index of type numpy ndarray raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=(2, 5, 7, 8)) index = [9, 10, 3, 1, 8, 10] arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[index]) index = np.random.permutation(8) arr3 = arr[:2, ..., index] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[:2, ..., index]) index = [1, 3, 9, 10] arr4 = arr[..., index, :5] res = self.executor.execute_tensor(arr4, concat=True)[0] np.testing.assert_array_equal(res, raw[..., index, :5]) index1 = [8, 10, 3, 1, 9, 10] index2 = [1, 3, 9, 10, 2, 7] arr5 = arr[index1, :, index2] res = self.executor.execute_tensor(arr5, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [1, 3, 5, 7, 9, 10] index2 = [1, 9, 9, 10, 2, 7] arr6 = arr[index1, :, index2] res = self.executor.execute_tensor(arr6, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) # fancy index is ordered, no concat required self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1) index1 = [[8, 10, 3], [1, 9, 10]] index2 = [[1, 3, 9], [10, 2, 7]] arr7 = arr[index1, :, index2] res = self.executor.execute_tensor(arr7, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [[1, 3], [3, 7], [7, 7]] index2 = [1, 9] arr8 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr8, concat=True)[0] np.testing.assert_array_equal(res, raw[0, index1, :, index2])
def testMergeOneChunk(self): df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]}) df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]}) # all have one chunk mdf1 = from_pandas(df1) mdf2 = from_pandas(df2) df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey') tiled = df.tiles() self.assertEqual(tiled.chunk_shape, (1, 1)) self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key) self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key) # left has one chunk mdf1 = from_pandas(df1) mdf2 = from_pandas(df2, chunk_size=2) df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey') tiled = df.tiles() self.assertEqual(tiled.chunk_shape, (2, 1)) self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key) self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key) self.assertEqual(tiled.chunks[1].inputs[0].key, get_tiled(mdf1).chunks[0].key) self.assertEqual(tiled.chunks[1].inputs[1].key, get_tiled(mdf2).chunks[1].key) # right has one chunk mdf1 = from_pandas(df1, chunk_size=2) mdf2 = from_pandas(df2) df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey') tiled = df.tiles() self.assertEqual(tiled.chunk_shape, (2, 1)) self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key) self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key) self.assertEqual(tiled.chunks[1].inputs[0].key, get_tiled(mdf1).chunks[1].key) self.assertEqual(tiled.chunks[1].inputs[1].key, get_tiled(mdf2).chunks[0].key)
def testUnravelIndex(self): indices = tensor([22, 41, 37], chunk_size=1) t = unravel_index(indices, (7, 6)) self.assertEqual(len(t), 2) [r.tiles() for r in t] t = [get_tiled(r) for r in t] self.assertEqual(len(t[0].chunks), 3) self.assertEqual(len(t[1].chunks), 3) with self.assertRaises(TypeError): unravel_index([22, 41, 37], (7, 6), order='B')
def testBetaInc(self): raw1 = np.random.rand(4, 3, 2) raw2 = np.random.rand(4, 3, 2) raw3 = np.random.rand(4, 3, 2) a = tensor(raw1, chunk_size=3) b = tensor(raw2, chunk_size=3) c = tensor(raw3, chunk_size=3) r = betainc(a, b, c) expect = scipy_betainc(raw1, raw2, raw3) self.assertEqual(r.shape, raw1.shape) self.assertEqual(r.dtype, expect.dtype) r = r.tiles() tiled_a = get_tiled(a) self.assertEqual(r.nsplits, tiled_a.nsplits) for chunk in r.chunks: self.assertIsInstance(chunk.op, TensorBetaInc) self.assertEqual(chunk.index, chunk.inputs[0].index) self.assertEqual(chunk.shape, chunk.inputs[0].shape) betainc(a, b, c, out=a) expect = scipy_betainc(raw1, raw2, raw3) self.assertEqual(a.shape, raw1.shape) self.assertEqual(a.dtype, expect.dtype) tiled_a = a.tiles() b = get_tiled(b) self.assertEqual(tiled_a.nsplits, b.nsplits) for c in r.chunks: self.assertIsInstance(c.op, TensorBetaInc) self.assertEqual(c.index, c.inputs[0].index) self.assertEqual(c.shape, c.inputs[0].shape)
def testIterativeTiling(self): sess = new_session() rs = np.random.RandomState(0) raw = rs.rand(100) a = mt.tensor(raw, chunk_size=10) a.sort() c = a[:5] ret = sess.run(c) np.testing.assert_array_equal(ret, np.sort(raw)[:5]) executor = sess._sess.executor self.assertEqual(len(executor.chunk_result), 1) executor.chunk_result.clear() raw1 = rs.rand(20) raw2 = rs.rand(20) a = mt.tensor(raw1, chunk_size=10) a.sort() b = mt.tensor(raw2, chunk_size=15) + 1 c = mt.concatenate([a[:10], b]) c.sort() d = c[:5] ret = sess.run(d) expected = np.sort(np.concatenate([np.sort(raw1)[:10], raw2 + 1]))[:5] np.testing.assert_array_equal(ret, expected) self.assertEqual(len(executor.chunk_result), len(get_tiled(d).chunks)) raw = rs.rand(100) a = mt.tensor(raw, chunk_size=10) a.sort() b = a + 1 c = b[:5] ret = sess.run([b, c]) expected = np.sort(raw + 1)[:5] np.testing.assert_array_equal(ret[1], expected) raw = rs.randint(100, size=(100,)) a = mt.tensor(raw, chunk_size=23) a.sort() b = mt.histogram(a, bins='stone') res = sess.run(b) expected = np.histogram(np.sort(raw), bins='stone') np.testing.assert_almost_equal(res[0], expected[0]) np.testing.assert_almost_equal(res[1], expected[1])
def testMixedIndexingTiles(self): t = ones((100, 200, 300, 400), chunk_size=24) cmp = ones(400, chunk_size=24) < 2 t2 = t[10:90:3, 5, ..., None, cmp] t2 = t2.tiles() cmp = get_tiled(cmp) self.assertEqual(t2.shape[:-1], (27, 300, 1)) self.assertTrue(np.isnan(t2.shape[-1])) self.assertEqual(t2.chunk_shape, (4, 13, 1, 17)) self.assertEqual( t2.chunks[0].op.indexes, [slice(10, 24, 3), 5, slice(None), None, cmp.cix[0, ].data])
def testGraphDeviceAssigner(self): import mars.tensor as mt a = mt.random.rand(10, 10, chunk_size=5, gpu=True) b = a.sum(axis=1) graph = b.build_graph(tiled=True, fuse_enabled=False) assigner = GraphDeviceAssigner(graph, list(n.op for n in graph.iter_indep()), devices=[0, 1]) assigner.assign() a = get_tiled(a) self.assertEqual(a.cix[0, 0].device, a.cix[0, 1].device) self.assertEqual(a.cix[1, 0].device, a.cix[1, 1].device) self.assertNotEqual(a.cix[0, 0].device, a.cix[1, 0].device)
def testFetch(self): from mars.session import Session with option_context({'eager_mode': True}): arr1 = mt.ones((10, 5), chunk_size=4) np.testing.assert_array_equal(arr1, np.ones((10, 5))) sess = Session.default_or_local() executor = sess._sess._executor executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones( (4, 4)) * 2 arr2 = mt.ones((10, 5), chunk_size=4) - 1 result = arr2.fetch() np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4))) np.testing.assert_array_equal(result[8:, :4], np.zeros((2, 4)))
def testExecuteBothExecutedAndNot(self): data = np.random.random((5, 9)) arr1 = mt.tensor(data, chunk_size=4) * 2 arr2 = mt.tensor(data) + 1 np.testing.assert_array_equal(arr2.to_numpy(), data + 1) # modify result sess = Session.default_or_local() executor = sess._sess._executor executor.chunk_result[get_tiled(arr2).chunks[0].key] = data + 2 results = sess.run(arr1, arr2) np.testing.assert_array_equal(results[0], data * 2) np.testing.assert_array_equal(results[1], data + 2)
def testRechunk(self): tensor = ones((12, 9), chunk_size=4) new_tensor = tensor.rechunk(3) new_tensor = new_tensor.tiles() self.assertEqual(len(new_tensor.chunks), 12) self.assertEqual(new_tensor.chunks[0].inputs[0], get_tiled(tensor).chunks[0].data) self.assertEqual(len(new_tensor.chunks[1].inputs), 2) self.assertEqual(new_tensor.chunks[1].inputs[0].op.slices, [slice(None, 3, None), slice(3, None, None)]) self.assertEqual(new_tensor.chunks[1].inputs[1].op.slices, [slice(None, 3, None), slice(None, 2, None)]) self.assertEqual(len(new_tensor.chunks[-1].inputs), 2) self.assertEqual(new_tensor.chunks[-1].inputs[0].op.slices, [slice(1, None, None), slice(2, None, None)]) self.assertEqual(new_tensor.chunks[-1].inputs[1].op.slices, [slice(1, None, None), slice(None, None, None)])
def _check_nsplits(self, tileable): from mars.core import get_tiled tiled = get_tiled(tileable) if tiled.nsplits == () and len(tiled.chunks) == 1: return nsplit_chunk_shape = tuple(len(s) for s in tiled.nsplits) if nsplit_chunk_shape != tiled.chunk_shape: raise AssertionError( 'Operand %r: shape of nsplits %r not consistent with chunk shape %r' % (tiled.op, nsplit_chunk_shape, tiled.chunk_shape)) from None nsplit_shape = tuple(np.sum(s) for s in tiled.nsplits) try: self.assert_shape_consistent(nsplit_shape, tiled.shape) except AssertionError: raise AssertionError( 'Operand %r: shape computed from nsplits %r -> %r not consistent with real shape %r' % (tiled.op, tiled.nsplits, nsplit_shape, tiled.shape)) from None for c in tiled.chunks: try: tiled_c = tiled.cix[c.index] except ValueError as ex: raise AssertionError( 'Operand %r: Malformed index %r, nsplits is %r. Raw error is %r' % (c.op, c.index, tiled.nsplits, ex)) from None if tiled_c is not c: raise AssertionError( 'Operand %r: Cannot spot chunk via index %r, nsplits is %r' % (c.op, c.index, tiled.nsplits)) for cid, shape in enumerate(itertools.product(*tiled.nsplits)): chunk_shape = self._raw_chunk_shapes.get( tiled.chunks[cid].key) or tiled.chunks[cid].shape if len(shape) != len(chunk_shape): raise AssertionError( 'Operand %r: Shape in nsplits %r does not meet shape in chunk %r' % (tiled.chunks[cid].op, shape, chunk_shape)) for s1, s2 in zip(shape, chunk_shape): if (not (np.isnan(s1) and np.isnan(s2))) and s1 != s2: raise AssertionError( 'Operand %r: Shape in nsplits %r does not meet shape in chunk %r' % (tiled.chunks[cid].op, shape, chunk_shape))
def testElf(self): raw = np.random.rand(10, 8, 5) t = tensor(raw, chunk_size=3) r = erf(t) expect = scipy_erf(raw) self.assertEqual(r.shape, raw.shape) self.assertEqual(r.dtype, expect.dtype) r = r.tiles() t = get_tiled(t) self.assertEqual(r.nsplits, t.nsplits) for c in r.chunks: self.assertIsInstance(c.op, TensorErf) self.assertEqual(c.index, c.inputs[0].index) self.assertEqual(c.shape, c.inputs[0].shape)
def testSliceTiles(self): t = ones((100, 200, 300), chunk_size=30) t2 = t[10:40, 199:, -30:303] t2 = t2.tiles() t = get_tiled(t) self.assertEqual(t2.chunk_shape, (2, 1, 1)) self.assertEqual(t2.chunks[0].inputs[0], t.cix[0, -1, -1].data) self.assertEqual( t2.chunks[0].op.indexes, [slice(10, 30, 1), slice(19, 20, 1), slice(None)]) self.assertEqual(t2.chunks[0].index, (0, 0, 0)) self.assertEqual(t2.chunks[1].inputs[0], t.cix[1, -1, -1].data) self.assertEqual( t2.chunks[1].op.indexes, [slice(0, 10, 1), slice(19, 20, 1), slice(None)]) self.assertEqual(t2.chunks[1].index, (1, 0, 0))
def testTensorExecuteNotFetch(self): data = np.random.random((5, 9)) sess = Session.default_or_local() arr1 = mt.tensor(data, chunk_size=2) * 2 with self.assertRaises(ValueError): sess.fetch(arr1) self.assertIs(arr1.execute(), arr1) # modify result executor = sess._sess._executor executor.chunk_result[get_tiled(arr1).chunks[0].key] = data[:2, :2] * 3 expected = data * 2 expected[:2, :2] = data[:2, :2] * 3 np.testing.assert_array_equal(arr1.to_numpy(), expected)
def run_test(self, worker, calc_device=None): import mars.tensor as mt from mars.worker import ExecutionActor session_id = str(uuid.uuid4()) gpu = calc_device in ('cuda', ) a = mt.random.rand(100, 50, chunk_size=30, gpu=gpu) b = mt.random.rand(50, 200, chunk_size=30, gpu=gpu) result = a.dot(b) graph = result.build_graph(tiled=True) result = get_tiled(result) executor_ref = self.promise_ref(ExecutionActor.default_uid(), address=worker) io_meta = dict(chunks=[c.key for c in result.chunks]) graph_key = str(id(graph)) executor_ref.execute_graph(session_id, graph_key, serialize_graph(graph), io_meta, None, calc_device=calc_device, _promise=True) \ .then(lambda *_: setattr(self, '_replied', True))
def testDataFrameExecuteNotFetch(self): data1 = pd.DataFrame(np.random.random((5, 4)), columns=list('abcd')) sess = Session.default_or_local() df1 = md.DataFrame(data1, chunk_size=2) with self.assertRaises(ValueError): sess.fetch(df1) self.assertIs(df1.execute(), df1) self.assertEqual(len(df1[df1['a'] > 1].to_pandas(fetch_kwargs={'batch_size': 2})), 0) self.assertEqual(len(df1[df1['a'] > 1]['a'].to_pandas(fetch_kwargs={'batch_size': 2})), 0) # modify result executor = sess._sess._executor executor.chunk_result[get_tiled(df1).chunks[0].key] = data1.iloc[:2, :2] * 3 expected = data1 expected.iloc[:2, :2] = data1.iloc[:2, :2] * 3 pd.testing.assert_frame_equal(df1.to_pandas(), expected) pd.testing.assert_frame_equal(df1.to_pandas(fetch_kwargs={'batch_size': 2}), expected)
def run_simple_calc(self, session_id): self._session_id = session_id import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) + 1 graph = arr.build_graph(fuse_enabled=False, tiled=True) arr = get_tiled(arr) self._array_key = arr.chunks[0].key graph_key = self._graph_key = str(uuid.uuid4()) execution_ref = self.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: self._results.append((True,))) \ .catch(lambda *exc: self._results.append((False, exc)))
def testEstimateGraphFinishTime(self): pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) status_ref = pool.actor_ref(StatusActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((10, 8), chunk_size=10) graph = arr.build_graph(fuse_enabled=False, tiled=True) arr = get_tiled(arr) graph_key = str(uuid.uuid4()) for _ in range(options.optimize.min_stats_count + 1): status_ref.update_mean_stats( 'calc_speed.' + type(arr.chunks[0].op).__name__, 10) status_ref.update_mean_stats('disk_read_speed', 10) status_ref.update_mean_stats('disk_write_speed', 10) status_ref.update_mean_stats('net_transfer_speed', 10) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None) execution_ref.estimate_graph_finish_time(session_id, graph_key) stats_dict = status_ref.get_stats( ['min_est_finish_time', 'max_est_finish_time']) self.assertIsNotNone(stats_dict.get('min_est_finish_time')) self.assertIsNotNone(stats_dict.get('max_est_finish_time'))