def testRemoteFunctionInLocalCluster(self): with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M', modules=[__name__], web=True) as cluster: session = cluster.session def f(x): return x + 1 def g(x, y): return x * y a = mr.spawn(f, 3) b = mr.spawn(f, 4) c = mr.spawn(g, (a, b)) r = session.run(c, timeout=_exec_timeout) self.assertEqual(r, 20) e = mr.spawn(f, mr.spawn(f, 2)) r = session.run(e, timeout=_exec_timeout) self.assertEqual(r, 4) session2 = new_session(cluster.endpoint) expect_session_id = session2.session_id def f2(): session = Session.default assert isinstance(session._sess, ClusterSession) assert session._sess.session_id == expect_session_id t = mt.ones((3, 2)) return t.sum().to_numpy() self.assertEqual(cloudpickle.loads(cloudpickle.dumps(Session.default)).session_id, session.session_id) self.assertIsInstance(serialize_function(f2), bytes) d = mr.spawn(f2, retry_when_fail=False) r = session2.run(d, timeout=_exec_timeout) self.assertEqual(r, 6) # test input tileable def f(t, x): return (t * x).sum().to_numpy() rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1.sum(axis=0) s = mr.spawn(f, args=(t2, 3), retry_when_fail=False) r = session.run(s, timeout=_exec_timeout) expected = (raw.sum(axis=0) * 3).sum() self.assertAlmostEqual(r, expected) # test named tileable session3 = new_session(cluster.endpoint) t = mt.ones((10, 10), chunk_size=3) session3.run(t, name='t_name') def f3(): import mars.tensor as mt s = mt.named_tensor(name='t_name') return (s + 1).to_numpy() d = mr.spawn(f3, retry_when_fail=False) r = session3.run(d, timeout=_exec_timeout) np.testing.assert_array_equal(r, np.ones((10, 10)) + 1) # test tileable that executed session4 = new_session(cluster.endpoint) df1 = md.DataFrame(raw, chunk_size=3) df1 = df1[df1.iloc[:, 0] < 1.5] def f4(input_df): bonus = input_df.iloc[:, 0].fetch().sum() return input_df.sum().to_pandas() + bonus d = mr.spawn(f4, args=(df1,), retry_when_fail=False) r = session4.run(d, timeout=_exec_timeout) expected = pd.DataFrame(raw).sum() + raw[:, 0].sum() pd.testing.assert_series_equal(r, expected) # test tileable has unknown shape session5 = new_session(cluster.endpoint) def f5(t, x): assert all(not np.isnan(s) for s in t.shape) return (t * x).sum().to_numpy() rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1[t1 < 0.5] s = mr.spawn(f5, args=(t2, 3)) result = session5.run(s, timeout=_exec_timeout) expected = (raw[raw < 0.5] * 3).sum() self.assertAlmostEqual(result, expected)
def testSplitPreparation(self, *_): arr = mt.ones(12, chunk_size=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] with self.prepare_graph_in_pool(arr_sum, clean_io_meta=False): pass
def testShufflePreparation(self, *_): a = mt.ones((31, 27), chunk_size=10) b = a.reshape(27, 31) b.op.extra_params['_reshape_with_shuffle'] = True with self.prepare_graph_in_pool(b, compose=False): pass
def testMainTensorWithoutEtcd(self): self.start_processes() session_id = uuid.uuid1() actor_client = new_client() session_ref = actor_client.actor_ref( self.session_manager_ref.create_session(session_id)) a = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1 b = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1 c = (a * b * 2 + 1).sum() graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) expected = (np.ones(a.shape) * 2 * 1 + 1)**2 * 2 + 1 assert_allclose(loads(result), expected.sum()) a = mt.ones((100, 50), chunk_size=35) * 2 + 1 b = mt.ones((50, 200), chunk_size=35) * 2 + 1 c = a.dot(b) graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) assert_allclose(loads(result), np.ones((100, 200)) * 450) base_arr = np.random.random((100, 100)) a = mt.array(base_arr) sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)]) graph = sumv.build_graph() targets = [sumv.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) expected = reduce(operator.add, [base_arr[:10, :10] for _ in range(10)]) result = session_ref.fetch_result(graph_key, sumv.key) assert_allclose(loads(result), expected) a = mt.ones((31, 27), chunk_size=10) b = a.reshape(27, 31) b.op.extra_params['_reshape_with_shuffle'] = True r = b.sum(axis=1) graph = r.build_graph() targets = [r.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, r.key) assert_allclose(loads(result), np.ones((27, 31)).sum(axis=1)) raw = np.random.RandomState(0).rand(10, 10) a = mt.tensor(raw, chunk_size=(5, 4)) b = a[a.argmin(axis=1), mt.tensor(np.arange(10))] graph = b.build_graph() targets = [b.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, b.key) np.testing.assert_array_equal(loads(result), raw[raw.argmin(axis=1), np.arange(10)])
def testFromTensorExecution(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = dataframe_from_tensor(tensor) tensor_res = self.executor.execute_tensor(tensor, concat=True)[0] pdf_expected = pd.DataFrame(tensor_res) df_result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10)) pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10)) pd.testing.assert_frame_equal(df_result, pdf_expected) # test converted with specified index_value and columns tensor2 = mt.random.rand(2, 2, chunk_size=1) df2 = dataframe_from_tensor(tensor2, index=pd.Index(['a', 'b']), columns=pd.Index([3, 4])) df_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b'])) pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4])) # test converted from 1-d tensor tensor3 = mt.array([1, 2, 3]) df3 = dataframe_from_tensor(tensor3) result3 = self.executor.execute_dataframe(df3, concat=True)[0] pdf_expected = pd.DataFrame(np.array([1, 2, 3])) pd.testing.assert_frame_equal(pdf_expected, result3) # test converted from identical chunks tensor4 = mt.ones((10, 10), chunk_size=3) df4 = dataframe_from_tensor(tensor4) result4 = self.executor.execute_dataframe(df4, concat=True)[0] pdf_expected = pd.DataFrame( self.executor.execute_tensor(tensor4, concat=True)[0]) pd.testing.assert_frame_equal(pdf_expected, result4) # from tensor with given index tensor5 = mt.ones((10, 10), chunk_size=3) df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2)) result5 = self.executor.execute_dataframe(df5, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor5, concat=True)[0], index=np.arange(0, 20, 2)) pd.testing.assert_frame_equal(pdf_expected, result5) # from tensor with given index that is a tensor raw7 = np.random.rand(10, 10) tensor7 = mt.tensor(raw7, chunk_size=3) index_raw7 = np.random.rand(10) index7 = mt.tensor(index_raw7, chunk_size=4) df7 = dataframe_from_tensor(tensor7, index=index7) result7 = self.executor.execute_dataframe(df7, concat=True)[0] pdf_expected = pd.DataFrame(raw7, index=index_raw7) pd.testing.assert_frame_equal(pdf_expected, result7) # from tensor with given index is a md.Index raw10 = np.random.rand(10, 10) tensor10 = mt.tensor(raw10, chunk_size=3) index10 = md.date_range('2020-1-1', periods=10, chunk_size=3) df10 = dataframe_from_tensor(tensor10, index=index10) result10 = self.executor.execute_dataframe(df10, concat=True)[0] pdf_expected = pd.DataFrame(raw10, index=pd.date_range('2020-1-1', periods=10)) pd.testing.assert_frame_equal(pdf_expected, result10) # from tensor with given columns tensor6 = mt.ones((10, 10), chunk_size=3) df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij')) result6 = self.executor.execute_dataframe(df6, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor6, concat=True)[0], columns=list('abcdefghij')) pd.testing.assert_frame_equal(pdf_expected, result6) # from 1d tensors raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10, size=8)), ('c', [ ''.join(np.random.choice(list(printable), size=6)) for _ in range(8) ])] tensors8 = OrderedDict( (r[0], mt.tensor(r[1], chunk_size=3)) for r in raws8) raws8.append(('d', 1)) raws8.append(('e', pd.date_range('2020-1-1', periods=8))) tensors8['d'] = 1 tensors8['e'] = raws8[-1][1] df8 = dataframe_from_1d_tileables(tensors8, columns=[r[0] for r in raws8]) result = self.executor.execute_dataframe(df8, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8)) pd.testing.assert_frame_equal(result, pdf_expected) # from 1d tensors and specify index with a tensor index_raw9 = np.random.rand(8) index9 = mt.tensor(index_raw9, chunk_size=4) df9 = dataframe_from_1d_tileables(tensors8, columns=[r[0] for r in raws8], index=index9) result = self.executor.execute_dataframe(df9, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9) pd.testing.assert_frame_equal(result, pdf_expected) # from 1d tensors and specify index df11 = dataframe_from_1d_tileables(tensors8, columns=[r[0] for r in raws8], index=md.date_range('2020-1-1', periods=8)) result = self.executor.execute_dataframe(df11, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8), index=pd.date_range('2020-1-1', periods=8)) pd.testing.assert_frame_equal(result, pdf_expected)
def f(): assert Session.default.session_id == session.session_id return mt.ones((2, 3)).sum().to_numpy()
def testReExecuteSame(self): data = np.random.random((5, 9)) # test run the same tensor arr4 = mt.tensor(data.copy(), chunk_size=3) + 1 result1 = arr4.to_numpy() expected = data + 1 np.testing.assert_array_equal(result1, expected) result2 = arr4.to_numpy() np.testing.assert_array_equal(result1, result2) # test run the same tensor with single chunk arr4 = mt.tensor(data.copy()) result1 = arr4.to_numpy() expected = data np.testing.assert_array_equal(result1, expected) result2 = arr4.to_numpy() np.testing.assert_array_equal(result1, result2) # modify result sess = Session.default_or_local() executor = sess._sess._executor executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2 result3 = arr4.to_numpy() np.testing.assert_array_equal(result3, data + 2) # test run same key tensor arr5 = mt.ones((10, 10), chunk_size=3) result1 = arr5.to_numpy() del arr5 arr6 = mt.ones((10, 10), chunk_size=3) result2 = arr6.to_numpy() np.testing.assert_array_equal(result1, result2) # test copy, make sure it will not let the execution cache missed df = md.DataFrame(mt.ones((10, 3), chunk_size=5)) executed = [False] def add_one(x): if executed[0]: # pragma: no cover raise ValueError('executed before') return x + 1 df2 = df.apply(add_one) pd.testing.assert_frame_equal(df2.to_pandas(), pd.DataFrame(np.ones((10, 3)) + 1)) executed[0] = True df3 = df2.copy() df4 = df3 * 2 pd.testing.assert_frame_equal(df4.to_pandas(), pd.DataFrame(np.ones((10, 3)) * 4))
def testGraphWithSplit(self): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) arr = mt.ones(12, chunks=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] graph = arr_sum.build_graph(compose=False) serialized_graph = serialize_graph(graph) chunked_graph = arr_sum.build_graph(compose=False, tiled=True) with create_actor_pool(n_process=1, backend='gevent') as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) kv_store_ref = pool.create_actor(KVStoreActor, uid=KVStoreActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_name(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name( session_id, graph_key)) graph_ref.prepare_graph(compose=False) graph_data = kv_store_ref.read( '/sessions/%s/graphs/%s/chunk_graph' % (session_id, graph_key)).value self.assertIsNotNone(graph_data) fetched_graph = deserialize_graph(graph_data) self.assertEqual(len(chunked_graph), len(fetched_graph)) graph_ref.scan_node() op_infos = graph_ref.get_operand_info() for n in fetched_graph: depth = op_infos[n.op.key]['optimize']['depth'] self.assertIsNotNone(depth) successor_size = op_infos[ n.op.key]['optimize']['successor_size'] self.assertIsNotNone(successor_size) descendant_size = op_infos[ n.op.key]['optimize']['descendant_size'] self.assertIsNotNone(descendant_size) def write_mock_meta(): resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) v = gevent.spawn(write_mock_meta) v.join() graph_ref.place_initial_chunks() op_infos = graph_ref.get_operand_info() for n in fetched_graph: if fetched_graph.count_predecessors(n) != 0: continue target_worker = op_infos[n.op.key]['target_worker'] self.assertIsNotNone(target_worker) graph_ref.create_operand_actors() op_infos = graph_ref.get_operand_info() for n in fetched_graph: self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__) io_meta = op_infos[n.op.key]['io_meta'] orig_io_meta = dict( predecessors=list( set(pn.op.key for pn in fetched_graph.iter_predecessors(n))), successors=list( set(sn.op.key for sn in fetched_graph.iter_successors(n))), input_chunks=list( set(pn.key for pn in fetched_graph.iter_predecessors(n))), chunks=list(c.key for c in n.op.outputs), ) self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors'])) self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors'])) self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks'])) self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks'])) self.assertEqual(op_infos[n.op.key]['output_size'], sum(ch.nbytes for ch in n.op.outputs))
def testSameKey(self, *_): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) arr = mt.ones((5, 5), chunk_size=3) arr2 = mt.concatenate((arr, arr)) graph = arr2.build_graph(compose=False) serialized_graph = serialize_graph(graph) chunked_graph = arr2.build_graph(compose=False, tiled=True) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_name(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name( session_id, graph_key)) graph_ref.prepare_graph(compose=False) fetched_graph = graph_ref.get_chunk_graph() self.assertIsNotNone(fetched_graph) self.assertEqual(len(chunked_graph), len(fetched_graph)) graph_ref.scan_node() op_infos = graph_ref.get_operand_info() for n in fetched_graph: depth = op_infos[n.op.key]['optimize']['depth'] self.assertIsNotNone(depth) successor_size = op_infos[ n.op.key]['optimize']['successor_size'] self.assertIsNotNone(successor_size) descendant_size = op_infos[ n.op.key]['optimize']['descendant_size'] self.assertIsNotNone(descendant_size) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) graph_ref.place_initial_chunks() op_infos = graph_ref.get_operand_info() for n in fetched_graph: if fetched_graph.count_predecessors(n) != 0: continue target_worker = op_infos[n.op.key]['target_worker'] self.assertIsNotNone(target_worker) graph_ref.create_operand_actors(_clean_io_meta=False) op_infos = graph_ref.get_operand_info() for n in fetched_graph: self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__) io_meta = op_infos[n.op.key]['io_meta'] orig_io_meta = dict( predecessors=list( set(pn.op.key for pn in fetched_graph.iter_predecessors(n))), successors=list( set(sn.op.key for sn in fetched_graph.iter_successors(n))), input_chunks=list( set(pn.key for pn in fetched_graph.iter_predecessors(n))), chunks=list(c.key for c in n.op.outputs), ) self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors'])) self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors'])) self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks'])) self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks'])) self.assertEqual(op_infos[n.op.key]['output_size'], sum(ch.nbytes for ch in n.op.outputs))
def testChunkSerialize(self): t = ones((10, 3), chunk_size=(5, 2)).tiles() # pb chunk = t.chunks[0] serials = self._pb_serial(chunk) op, pb = serials[chunk.op, chunk.data] self.assertEqual(tuple(pb.index), chunk.index) self.assertEqual(pb.key, chunk.key) self.assertEqual(tuple(pb.shape), chunk.shape) self.assertEqual(int(op.type.split('.', 1)[1]), opcodes.TENSOR_ONES) chunk2 = self._pb_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(chunk.op.dtype, chunk2.op.dtype) # json chunk = t.chunks[0] serials = self._json_serial(chunk) chunk2 = self._json_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(chunk.op.dtype, chunk2.op.dtype) t = tensor(np.random.random((10, 3)), chunk_size=(5, 2)).tiles() # pb chunk = t.chunks[0] serials = self._pb_serial(chunk) op, pb = serials[chunk.op, chunk.data] self.assertEqual(tuple(pb.index), chunk.index) self.assertEqual(pb.key, chunk.key) self.assertEqual(tuple(pb.shape), chunk.shape) self.assertEqual(int(op.type.split('.', 1)[1]), opcodes.TENSOR_DATA_SOURCE) chunk2 = self._pb_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data)) # json chunk = t.chunks[0] serials = self._json_serial(chunk) chunk2 = self._json_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data)) t1 = tensor(np.random.random((10, 3)), chunk_size=(5, 2)) t2 = (t1 + 1).tiles() # pb chunk1 = get_tiled(t1).chunks[0] chunk2 = t2.chunks[0] composed_chunk = build_fuse_chunk([chunk1.data, chunk2.data], TensorFuseChunk) serials = self._pb_serial(composed_chunk) op, pb = serials[composed_chunk.op, composed_chunk.data] self.assertEqual(pb.key, composed_chunk.key) self.assertEqual(int(op.type.split('.', 1)[1]), opcodes.FUSE) composed_chunk2 = self._pb_deserial(serials)[composed_chunk.data] self.assertEqual(composed_chunk.key, composed_chunk2.key) self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op)) self.assertEqual(composed_chunk.composed[0].key, composed_chunk2.composed[0].key) self.assertEqual(composed_chunk.composed[-1].key, composed_chunk2.composed[-1].key) # json chunk1 = get_tiled(t1).chunks[0] chunk2 = t2.chunks[0] composed_chunk = build_fuse_chunk([chunk1.data, chunk2.data], TensorFuseChunk) serials = self._json_serial(composed_chunk) composed_chunk2 = self._json_deserial(serials)[composed_chunk.data] self.assertEqual(composed_chunk.key, composed_chunk2.key) self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op)) self.assertEqual(composed_chunk.composed[0].key, composed_chunk2.composed[0].key) self.assertEqual(composed_chunk.composed[-1].key, composed_chunk2.composed[-1].key) t1 = ones((10, 3), chunk_size=2) t2 = ones((3, 5), chunk_size=2) c = dot(t1, t2).tiles().chunks[0].inputs[0] # pb serials = self._pb_serial(c) c2 = self._pb_deserial(serials)[c] self.assertEqual(c.key, c2.key) # json serials = self._json_serial(c) c2 = self._json_deserial(serials)[c] self.assertEqual(c.key, c2.key)
def testFromTensor(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = dataframe_from_tensor(tensor) self.assertIsInstance(df.index_value._index_value, IndexValue.RangeIndex) self.assertEqual( df.op.dtypes[0], tensor.dtype, 'DataFrame converted from tensor have the wrong dtype') df.tiles() self.assertEqual(len(df.chunks), 4) self.assertIsInstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex) self.assertIsInstance(df.chunks[0].index_value, IndexValue) # test converted from 1-d tensor tensor2 = mt.array([1, 2, 3]) # in fact, tensor3 is (3,1) tensor3 = mt.array([tensor2]).T df2 = dataframe_from_tensor(tensor2) df3 = dataframe_from_tensor(tensor3) df2.tiles() df3.tiles() np.testing.assert_equal(df2.chunks[0].index, (0, 0)) np.testing.assert_equal(df3.chunks[0].index, (0, 0)) # test converted from scalar scalar = mt.array(1) np.testing.assert_equal(scalar.ndim, 0) with self.assertRaises(TypeError): dataframe_from_tensor(scalar) # from tensor with given index df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2)) df.tiles() pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) # from tensor with given columns df = dataframe_from_tensor(tensor, columns=list('abcdefghij')) df.tiles() pd.testing.assert_index_equal(df.chunks[0].columns.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[1].columns.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j'])) pd.testing.assert_index_equal(df.chunks[2].columns.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[3].columns.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j'])) # test series from tensor tensor = mt.random.rand(10, chunk_size=4) series = series_from_tensor(tensor, name='a') self.assertEqual(series.dtype, tensor.dtype) self.assertEqual(series.name, 'a') pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(10)) series.tiles() self.assertEqual(len(series.chunks), 3) pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(), pd.RangeIndex(0, 4)) self.assertEqual(series.chunks[0].name, 'a') pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(), pd.RangeIndex(4, 8)) self.assertEqual(series.chunks[1].name, 'a') pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(), pd.RangeIndex(8, 10)) self.assertEqual(series.chunks[2].name, 'a') with self.assertRaises(TypeError): series_from_tensor(mt.ones((10, 10)))
def testExecutorWithGeventProvider(self): executor = Executor(sync_provider_type=Executor.SyncProviderType.GEVENT) a = mt.ones((10, 10), chunk_size=2) res = executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, np.ones((10, 10)))
def testMain(self): session_id = uuid.uuid1() scheduler_address = '127.0.0.1:' + self.scheduler_port actor_client = new_client() session_ref = actor_client.create_actor(SessionActor, uid=SessionActor.gen_name(session_id), address=scheduler_address, session_id=session_id) a = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1 b = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1 c = (a * b * 2 + 1).sum() graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) state = self.wait_for_termination(session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) expected = (np.ones(a.shape) * 2 * 1 + 1) ** 2 * 2 + 1 assert_array_equal(loads(result), expected.sum()) graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) # todo this behavior may change when eager mode is introduced state = self.wait_for_termination(session_ref, graph_key) self.assertEqual(state, GraphState.FAILED) a = mt.ones((100, 50), chunk_size=35) * 2 + 1 b = mt.ones((50, 200), chunk_size=35) * 2 + 1 c = a.dot(b) graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) state = self.wait_for_termination(session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) assert_array_equal(loads(result), np.ones((100, 200)) * 450) base_arr = np.random.random((100, 100)) a = mt.array(base_arr) sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)]) graph = sumv.build_graph() targets = [sumv.key] graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) state = self.wait_for_termination(session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) expected = reduce(operator.add, [base_arr[:10, :10] for _ in range(10)]) result = session_ref.fetch_result(graph_key, sumv.key) assert_array_equal(loads(result), expected)
def testTensordot(self): from mars.tensor.linalg import tensordot, dot, inner t1 = ones((3, 4, 6), chunk_size=2) t2 = ones((4, 3, 5), chunk_size=2) t3 = tensordot(t1, t2, axes=((0, 1), (1, 0))) self.assertEqual(t3.shape, (6, 5)) t3.tiles() self.assertEqual(t3.shape, (6, 5)) self.assertEqual(len(t3.chunks), 9) a = ones((10000, 20000), chunk_size=5000) b = ones((20000, 1000), chunk_size=5000) with self.assertRaises(ValueError): tensordot(a, b) a = ones(10, chunk_size=2) b = ones((10, 20), chunk_size=2) c = dot(a, b) self.assertEqual(c.shape, (20, )) c.tiles() self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits)) a = ones((10, 20), chunk_size=2) b = ones(20, chunk_size=2) c = dot(a, b) self.assertEqual(c.shape, (10, )) c.tiles() self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits)) v = ones((100, 100), chunk_size=10) tv = v.dot(v) self.assertEqual(tv.shape, (100, 100)) tv.tiles() self.assertEqual(tv.shape, tuple(sum(s) for s in tv.nsplits)) a = ones((10, 20), chunk_size=2) b = ones((30, 20), chunk_size=2) c = inner(a, b) self.assertEqual(c.shape, (10, 30)) c.tiles() self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))
def testPrepareQuota(self, *_): pinned = [True] def _mock_pin(graph_key, chunk_keys): from mars.errors import PinChunkFailed if pinned[0]: raise PinChunkFailed return chunk_keys ChunkHolderActor.pin_chunks.side_effect = _mock_pin pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender') chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name()) import mars.tensor as mt from mars.tensor.expressions.datasource import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) chunk_meta_ref.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) start_time = time.time() execution_ref.enqueue_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): time.sleep(1) pinned[0] = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 1)
def testSimpleExecution(self): pool_address = f'127.0.0.1:{get_next_port()}' with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) pool.create_actor(CpuCalcActor, uid='w:1:calc-a') pool.create_actor(InProcHolderActor) import mars.tensor as mt from mars.tensor.datasource import TensorOnes from mars.tensor.fetch import TensorFetch arr = mt.ones((10, 8), chunk_size=10) arr_add = mt.ones((10, 8), chunk_size=10) arr2 = arr + arr_add graph = arr2.build_graph(fuse_enabled=False, tiled=True) arr = get_tiled(arr) arr2 = get_tiled(arr2) metas = dict() for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetch( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape, pool_address) with self.run_actor_test(pool) as test_actor: session_id = str(uuid.uuid4()) storage_client = test_actor.storage_client self.waitp( storage_client.put_objects( session_id, [arr.chunks[0].key], [np.ones((10, 8), dtype=np.int16)], [DataStorageDevice.SHARED_MEMORY]), ) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) def _validate(*_): data = test_actor.shared_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) graph_key = str(uuid.uuid4()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) def _validate(*_): data = test_actor.shared_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareSpilled(self): from mars.worker.spill import write_spill_file pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) options.worker.spill_directory = tempfile.mkdtemp( 'mars_worker_prep_spilled-') with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(SpillActor) pool.create_actor(CpuCalcActor) chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name()) pool.actor_ref(ChunkHolderActor.default_name()) import mars.tensor as mt from mars.tensor.expressions.datasource import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) # test meta missing with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() chunk_meta_ref.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) write_spill_file(modified_chunk.key, mock_data) # test read from spilled file with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareQuota(self, *_): pinned = True orig_pin = SharedHolderActor.pin_data_keys def _mock_pin(self, session_id, chunk_keys, token): from mars.errors import PinDataKeyFailed if pinned: raise PinDataKeyFailed return orig_pin(self, session_id, chunk_keys, token) pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \ create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.actor_ref(WorkerClusterInfoActor.default_uid()) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(fuse_enabled=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) metas = { modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost'))) } with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) start_time = time.time() execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): nonlocal pinned time.sleep(0.5) pinned = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 0.5)
def testFromTensor(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = dataframe_from_tensor(tensor) self.assertIsInstance(df.index_value._index_value, IndexValue.RangeIndex) self.assertEqual( df.op.dtypes[0], tensor.dtype, 'DataFrame converted from tensor have the wrong dtype') df = df.tiles() self.assertEqual(len(df.chunks), 4) self.assertIsInstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex) self.assertIsInstance(df.chunks[0].index_value, IndexValue) # test converted from 1-d tensor tensor2 = mt.array([1, 2, 3]) # in fact, tensor3 is (3,1) tensor3 = mt.array([tensor2]).T df2 = dataframe_from_tensor(tensor2) df3 = dataframe_from_tensor(tensor3) df2 = df2.tiles() df3 = df3.tiles() np.testing.assert_equal(df2.chunks[0].index, (0, 0)) np.testing.assert_equal(df3.chunks[0].index, (0, 0)) # test converted from scalar scalar = mt.array(1) np.testing.assert_equal(scalar.ndim, 0) with self.assertRaises(TypeError): dataframe_from_tensor(scalar) # from tensor with given index df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2)) df = df.tiles() pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) # from tensor with index that is a tensor as well df = dataframe_from_tensor(tensor, index=mt.arange(0, 20, 2)) df = df.tiles() self.assertEqual(len(df.chunks[0].inputs), 2) self.assertFalse(df.chunks[0].index_value.has_value()) # from tensor with given columns df = dataframe_from_tensor(tensor, columns=list('abcdefghij')) df = df.tiles() pd.testing.assert_index_equal(df.dtypes.index, pd.Index(list('abcdefghij'))) pd.testing.assert_index_equal(df.chunks[0].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[0].dtypes.index, pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[1].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j'])) pd.testing.assert_index_equal(df.chunks[1].dtypes.index, pd.Index(['f', 'g', 'h', 'i', 'j'])) pd.testing.assert_index_equal(df.chunks[2].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[2].dtypes.index, pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[3].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j'])) pd.testing.assert_index_equal(df.chunks[3].dtypes.index, pd.Index(['f', 'g', 'h', 'i', 'j'])) # test series from tensor tensor = mt.random.rand(10, chunk_size=4) series = series_from_tensor(tensor, name='a') self.assertEqual(series.dtype, tensor.dtype) self.assertEqual(series.name, 'a') pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(10)) series = series.tiles() self.assertEqual(len(series.chunks), 3) pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(), pd.RangeIndex(0, 4)) self.assertEqual(series.chunks[0].name, 'a') pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(), pd.RangeIndex(4, 8)) self.assertEqual(series.chunks[1].name, 'a') pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(), pd.RangeIndex(8, 10)) self.assertEqual(series.chunks[2].name, 'a') df = dataframe_from_1d_tensors( [mt.tensor(np.random.rand(4)), mt.tensor(np.random.rand(4))]) pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2)) df = df.tiles() pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.RangeIndex(4)) series = series_from_tensor(mt.random.rand(4)) pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4)) series = series_from_tensor(mt.random.rand(4), index=[1, 2, 3]) pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.Index([1, 2, 3])) series = series_from_tensor(mt.random.rand(4), index=pd.Index([1, 2, 3], name='my_index')) pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.Index([1, 2, 3], name='my_index')) self.assertEqual(series.index_value.name, 'my_index') with self.assertRaises(TypeError): series_from_tensor(mt.ones((10, 10))) # index has wrong shape with self.assertRaises(ValueError): dataframe_from_tensor(mt.random.rand(4, 3), index=mt.random.rand(5)) # columns have wrong shape with self.assertRaises(ValueError): dataframe_from_tensor(mt.random.rand(4, 3), columns=['a', 'b']) # index should be 1-d with self.assertRaises(ValueError): dataframe_from_tensor(mt.tensor(np.random.rand(3, 2)), index=mt.tensor(np.random.rand(3, 2))) # 1-d tensors should have same shapen with self.assertRaises(ValueError): dataframe_from_1d_tensors( [mt.tensor(np.random.rand(3)), mt.tensor(np.random.rand(2))]) # index has wrong shape with self.assertRaises(ValueError): dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))], index=mt.tensor(np.random.rand(2))) # columns have wrong shape with self.assertRaises(ValueError): dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))], columns=['a', 'b']) # index should be 1-d with self.assertRaises(ValueError): series_from_tensor(mt.random.rand(4), index=mt.random.rand(4, 3))
def testPrepareSpilled(self): pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) options.worker.spill_directory = tempfile.mkdtemp( prefix='mars_worker_prep_spilled-') with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(IORunnerActor) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(fuse_enabled=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) # test meta missing with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas = { modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address)) } # test read from spilled file with self.run_actor_test(pool) as test_actor: self.waitp( test_actor.storage_client.put_objects( session_id, [modified_chunk.key], [mock_data], [DataStorageDevice.PROC_MEMORY ]).then(lambda *_: test_actor.storage_client.copy_to( session_id, [modified_chunk.key], [DataStorageDevice.DISK]))) test_actor.storage_client.delete( session_id, [modified_chunk.key], [DataStorageDevice.PROC_MEMORY]) def _validate(*_): data = test_actor.shared_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testFetch(self): sess = new_session() arr1 = mt.ones((10, 5), chunk_size=3) r1 = sess.run(arr1) r2 = sess.run(arr1) np.testing.assert_array_equal(r1, r2) executor = sess._sess._executor executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones( (3, 3)) * 2 r3 = sess.run(arr1 + 1) np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3) # rerun to ensure arr1's chunk results still exist r4 = sess.run(arr1 + 1) np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3) arr2 = mt.ones((10, 5), chunk_size=3) r5 = sess.run(arr2) np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2) r6 = sess.run(arr2 + 1) np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3) df = md.DataFrame(np.random.rand(10, 2), columns=list('ab')) s = df['a'].map(lambda x: np.ones((3, 3)), dtype='object').sum() np.testing.assert_array_equal(s.execute().fetch(), np.ones((3, 3)) * 10) # test fetch multiple tensors raw = np.random.rand(5, 10) arr1 = mt.ones((5, 10), chunk_size=5) arr2 = mt.tensor(raw, chunk_size=3) arr3 = mt.sum(arr2) sess.run(arr1, arr2, arr3) fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3) np.testing.assert_array_equal(fetch1, np.ones((5, 10))) np.testing.assert_array_equal(fetch2, raw) np.testing.assert_almost_equal(fetch3, raw.sum()) fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3]) np.testing.assert_array_equal(fetch1, np.ones((5, 10))) np.testing.assert_array_equal(fetch2, raw) np.testing.assert_almost_equal(fetch3, raw.sum()) raw = np.random.rand(5, 10) arr = mt.tensor(raw, chunk_size=5) s = arr.sum() self.assertAlmostEqual(s.execute().fetch(), raw.sum()) def _execute_ds(*_): # pragma: no cover raise ValueError('cannot run random again') try: register(ArrayDataSource, _execute_ds) self.assertAlmostEqual(s.fetch(), raw.sum()) finally: del Executor._op_runners[ArrayDataSource]
def testFetchRemoteData(self): pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False, with_resource=True) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(fuse_enabled=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas = { modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', )) } with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas[modified_chunk.key] = WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost'))) with self.run_actor_test(pool) as test_actor: def _validate(*_): data = test_actor.shared_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testWebApi(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: self.assertEqual(sess.count_workers(), 1) a = mt.ones((100, 100), chunk_size=30) b = mt.ones((100, 100), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, np.ones((100, 100)) * 100) # check resubmission value2 = sess.run(c, timeout=timeout) assert_array_equal(value, value2) # check when local compression libs are missing from mars.serialize import dataserializer try: a = mt.ones((10, 10), chunk_size=30) b = mt.ones((10, 10), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, np.ones((10, 10)) * 10) dataserializer.decompressors[ dataserializer.CompressType.LZ4] = None dataserializer.decompressobjs[ dataserializer.CompressType.LZ4] = None dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = None assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10) finally: dataserializer.decompressors[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompress dataserializer.decompressobjs[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompressobj dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = dataserializer.lz4_open va = np.random.randint(0, 10000, (100, 100)) vb = np.random.randint(0, 10000, (100, 100)) a = mt.array(va, chunk_size=30) b = mt.array(vb, chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, va.dot(vb)) graphs = sess.get_graph_states() # make sure status got uploaded time.sleep(1.5) # check web UI requests res = requests.get(service_ep) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler/127.0.0.1:%s' % (service_ep, self.scheduler_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker/127.0.0.1:%s' % (service_ep, self.worker_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker/127.0.0.1:%s/timeline' % (service_ep, self.worker_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/session' % (service_ep, )) self.assertEqual(res.status_code, 200) task_id = next(iter(graphs.keys())) res = requests.get('%s/session/%s/graph/%s' % (service_ep, sess._session_id, task_id)) self.assertEqual(res.status_code, 200) res = requests.get('%s/session/%s/graph/%s/running_nodes' % (service_ep, sess._session_id, task_id)) self.assertEqual(res.status_code, 200) from mars.web.task_pages import PROGRESS_APP_NAME res = requests.get( '%s/%s?session_id=%s&task_id=%s' % (service_ep, PROGRESS_APP_NAME, sess._session_id, task_id)) self.assertEqual(res.status_code, 200) from mars.web.worker_pages import TIMELINE_APP_NAME res = requests.get( '%s/%s?endpoint=127.0.0.1:%s' % (service_ep, TIMELINE_APP_NAME, self.worker_port)) self.assertEqual(res.status_code, 200) # make sure all chunks freed when session quits from mars.worker.storage import StorageManagerActor actor_client = new_client() storage_manager_ref = actor_client.actor_ref( StorageManagerActor.default_uid(), address='127.0.0.1:' + str(self.worker_port)) self.assertFalse(bool(storage_manager_ref.dump_keys()))
def testEagerMode(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: self.assertIsInstance(Session.default_or_local()._sess, LocalClusterSession) with option_context({'eager_mode': True}): a_data = np.random.rand(10, 10) a = mt.tensor(a_data, chunk_size=3) np.testing.assert_array_equal(a, a_data) r1 = a + 1 expected1 = a_data + 1 np.testing.assert_array_equal(r1, expected1) r2 = r1.dot(r1) expected2 = expected1.dot(expected1) np.testing.assert_array_almost_equal(r2, expected2) a = mt.ones((10, 10), chunk_size=3) with self.assertRaises(ValueError): a.fetch() r = a.dot(a) np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10) with new_session('http://' + cluster._web_endpoint).as_default(): self.assertIsInstance(Session.default_or_local()._sess, WebSession) with option_context({'eager_mode': True}): a_data = np.random.rand(10, 10) a = mt.tensor(a_data, chunk_size=3) np.testing.assert_array_equal(a, a_data) r1 = a + 1 expected1 = a_data + 1 np.testing.assert_array_equal(r1, expected1) r2 = r1.dot(r1) expected2 = expected1.dot(expected1) np.testing.assert_array_almost_equal(r2, expected2) web_session = Session.default_or_local()._sess self.assertEqual(web_session.get_task_count(), 3) a = mt.ones((10, 10), chunk_size=3) with self.assertRaises(ValueError): a.fetch() r = a.dot(a) np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10) with new_session('http://' + cluster._web_endpoint).as_default(): from mars.dataframe.datasource.dataframe import from_pandas as from_pandas_df from mars.dataframe.datasource.series import from_pandas as from_pandas_series from mars.dataframe.arithmetic import add self.assertIsInstance(Session.default_or_local()._sess, WebSession) with option_context({'eager_mode': True}): data1 = pd.DataFrame( np.random.rand(10, 10), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9], columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7]) df1 = from_pandas_df(data1, chunk_size=5) pd.testing.assert_frame_equal(df1.fetch(), data1) data2 = pd.DataFrame( np.random.rand(10, 10), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3], columns=[5, 9, 12, 3, 11, 10, 6, 4, 1, 2]) df2 = from_pandas_df(data2, chunk_size=6) pd.testing.assert_frame_equal(df2.fetch(), data2) df3 = add(df1, df2) pd.testing.assert_frame_equal(df3.fetch(), data1 + data2) s1 = pd.Series(np.random.rand(10), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3]) series1 = from_pandas_series(s1) pd.testing.assert_series_equal(series1.fetch(), s1) web_session = Session.default_or_local()._sess self.assertEqual(web_session.get_task_count(), 4)
def testWebApi(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: session_id = sess._session_id self.assertEqual(sess.count_workers(), 1) a = mt.ones((100, 100), chunk_size=30) b = mt.ones((100, 100), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) np.testing.assert_array_equal(value, np.ones((100, 100)) * 100) # check resubmission value2 = sess.run(c, timeout=timeout) np.testing.assert_array_equal(value, value2) # check when local compression libs are missing from mars.serialize import dataserializer try: a = mt.ones((10, 10), chunk_size=30) b = mt.ones((10, 10), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) np.testing.assert_array_equal(value, np.ones((10, 10)) * 10) dataserializer.decompressors[ dataserializer.CompressType.LZ4] = None dataserializer.decompressobjs[ dataserializer.CompressType.LZ4] = None dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = None np.testing.assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10) finally: dataserializer.decompressors[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompress dataserializer.decompressobjs[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompressobj dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = dataserializer.lz4_open # check serialization by pickle try: sess._sess._serial_type = SerialType.PICKLE a = mt.ones((10, 10), chunk_size=30) b = mt.ones((10, 10), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) np.testing.assert_array_equal(value, np.ones((10, 10)) * 10) raw = pd.DataFrame(np.random.rand(10, 5), columns=list('ABCDE'), index=pd.RangeIndex(10, 0, -1)) data = md.DataFrame(raw).astype({'E': 'arrow_string'}) ret_data = data.execute(session=sess).fetch(session=sess) self.assertEqual(ret_data.dtypes['E'], np.dtype('O')) pd.testing.assert_frame_equal(ret_data.astype({'E': 'float'}), raw, check_less_precise=True) raw = pd.Series(np.random.rand(10), index=pd.RangeIndex(10, 0, -1), name='r') data = md.Series(raw).astype('Arrow[string]') ret_data = data.execute(session=sess).fetch(session=sess) self.assertEqual(ret_data.dtype, np.dtype('O')) pd.testing.assert_series_equal(ret_data.astype('float'), raw) finally: sess._sess._serial_type = SerialType.ARROW va = np.random.randint(0, 10000, (100, 100)) vb = np.random.randint(0, 10000, (100, 100)) a = mt.array(va, chunk_size=30) b = mt.array(vb, chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) np.testing.assert_array_equal(value, va.dot(vb)) # test fetch log def f(): print('test') r = mr.spawn(f).execute(session=sess, timeout=timeout) self.assertEqual(str(r.fetch_log()).strip(), 'test') self.assertEqual(str(r.fetch_log(offsets=0)).strip(), 'test') self.assertEqual(str(r.fetch_log()).strip(), '') self.assertEqual( str(r.fetch_log(offsets='-0.003k', sizes=2)).strip(), 'st') graphs = sess.get_graph_states() # make sure status got uploaded time.sleep(1.5) # check web UI requests res = requests.get(service_ep) self.assertEqual(res.status_code, 200) res = requests.get(f'{service_ep}/scheduler') self.assertEqual(res.status_code, 200) res = requests.get( f'{service_ep}/scheduler/127.0.0.1:{self.scheduler_port}') self.assertEqual(res.status_code, 200) res = requests.get(f'{service_ep}/worker') self.assertEqual(res.status_code, 200) res = requests.get( f'{service_ep}/worker/127.0.0.1:{self.worker_port}') self.assertEqual(res.status_code, 200) res = requests.get( f'{service_ep}/worker/127.0.0.1:{self.worker_port}/timeline') self.assertEqual(res.status_code, 200) res = requests.get(f'{service_ep}/session') self.assertEqual(res.status_code, 200) task_id = next(iter(graphs.keys())) res = requests.get( f'{service_ep}/session/{session_id}/graph/{task_id}') self.assertEqual(res.status_code, 200) res = requests.get( f'{service_ep}/session/{session_id}/graph/{task_id}/running_nodes' ) self.assertEqual(res.status_code, 200) from mars.web.task_pages import PROGRESS_APP_NAME res = requests.get( f'{service_ep}/{PROGRESS_APP_NAME}?session_id={session_id}&task_id={task_id}' ) self.assertEqual(res.status_code, 200) from mars.web.worker_pages import TIMELINE_APP_NAME res = requests.get( f'{service_ep}/{TIMELINE_APP_NAME}?endpoint=127.0.0.1:{self.worker_port}' ) self.assertEqual(res.status_code, 200) # make sure all chunks freed when session quits from mars.worker.storage import StorageManagerActor actor_client = new_client() storage_manager_ref = actor_client.actor_ref( StorageManagerActor.default_uid(), address='127.0.0.1:' + str(self.worker_port)) self.assertSetEqual(set(storage_manager_ref.dump_keys()), set())
def f(): assert get_default_session().session_id == session_id return mt.ones((2, 3)).sum().to_numpy()
def testSameKeyPreparation(self, *_): arr = mt.ones((5, 5), chunk_size=3) arr2 = mt.concatenate((arr, arr)) with self.prepare_graph_in_pool(arr2, clean_io_meta=False): pass
def testSimpleExecution(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) pool.create_actor(CpuCalcActor, uid='w:1:calc-a') import mars.tensor as mt from mars.tensor.expressions.datasource import TensorOnes, TensorFetch arr = mt.ones((10, 8), chunk_size=10) arr_add = mt.ones((10, 8), chunk_size=10) arr2 = arr + arr_add graph = arr2.build_graph(compose=False, tiled=True) for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetch( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) with self.run_actor_test(pool) as test_actor: session_id = str(uuid.uuid4()) chunk_holder_ref = test_actor.promise_ref( ChunkHolderActor.default_name()) refs = test_actor._chunk_store.put( session_id, arr.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key) del refs refs = test_actor._chunk_store.put( session_id, arr_add.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr_add.chunks[0].key) del refs execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) graph_key = str(uuid.uuid4()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _tell=True)) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testChunkSerialize(self): t = ones((10, 3), chunk_size=(5, 2)).tiles() # pb chunk = t.chunks[0] serials = self._pb_serial(chunk) op, pb = serials[chunk.op, chunk.data] self.assertEqual(tuple(pb.index), chunk.index) self.assertEqual(pb.key, chunk.key) self.assertEqual(tuple(pb.shape), chunk.shape) self.assertEqual(int(op.type.split('.', 1)[1]), OperandDef.TENSOR_ONES) chunk2 = self._pb_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(chunk.op.dtype, chunk2.op.dtype) # json chunk = t.chunks[0] serials = self._json_serial(chunk) chunk2 = self._json_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(chunk.op.dtype, chunk2.op.dtype) t = tensor(np.random.random((10, 3)), chunk_size=(5, 2)).tiles() # pb chunk = t.chunks[0] serials = self._pb_serial(chunk) op, pb = serials[chunk.op, chunk.data] self.assertEqual(tuple(pb.index), chunk.index) self.assertEqual(pb.key, chunk.key) self.assertEqual(tuple(pb.shape), chunk.shape) self.assertEqual(int(op.type.split('.', 1)[1]), OperandDef.TENSOR_DATA_SOURCE) chunk2 = self._pb_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data)) # json chunk = t.chunks[0] serials = self._json_serial(chunk) chunk2 = self._json_deserial(serials)[chunk.data] self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.key, chunk2.key) self.assertEqual(chunk.shape, chunk2.shape) self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data)) t = (tensor(np.random.random((10, 3)), chunk_size=(5, 2)) + 1).tiles() # pb chunk1 = t.chunks[0] chunk2 = t.chunks[1] fuse_op = TensorFuseChunk() composed_chunk = fuse_op.new_chunk( chunk1.inputs, chunk2.shape, _key=chunk2.key, _composed=[chunk1.data, chunk2.data]) serials = self._pb_serial(composed_chunk) op, pb = serials[composed_chunk.op, composed_chunk.data] self.assertEqual(pb.key, composed_chunk.key) self.assertEqual(int(op.type.split('.', 1)[1]), OperandDef.FUSE) self.assertEqual(len(pb.composed), 2) composed_chunk2 = self._pb_deserial(serials)[composed_chunk.data] self.assertEqual(composed_chunk.key, composed_chunk2.key) self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op)) self.assertEqual(composed_chunk.composed[0].inputs[0].key, composed_chunk2.composed[0].inputs[0].key) self.assertEqual(composed_chunk.inputs[-1].key, composed_chunk2.inputs[-1].key) # json chunk1 = t.chunks[0] chunk2 = t.chunks[1] fuse_op = TensorFuseChunk() composed_chunk = fuse_op.new_chunk( chunk1.inputs, chunk2.shape, _key=chunk2.key, _composed=[chunk1.data, chunk2.data]) serials = self._json_serial(composed_chunk) composed_chunk2 = self._json_deserial(serials)[composed_chunk.data] self.assertEqual(composed_chunk.key, composed_chunk2.key) self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op)) self.assertEqual(composed_chunk.composed[0].inputs[0].key, composed_chunk2.composed[0].inputs[0].key) self.assertEqual(composed_chunk.inputs[-1].key, composed_chunk2.inputs[-1].key) t1 = ones((10, 3), chunk_size=2) t2 = ones((3, 5), chunk_size=2) c = dot(t1, t2).tiles().chunks[0].inputs[0] # pb serials = self._pb_serial(c) c2 = self._pb_deserial(serials)[c] self.assertEqual(c.key, c2.key) # json serials = self._json_serial(c) c2 = self._json_deserial(serials)[c] self.assertEqual(c.key, c2.key)
def testWebApi(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: self.assertEqual(sess.count_workers(), 1) a = mt.ones((100, 100), chunk_size=30) b = mt.ones((100, 100), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, np.ones((100, 100)) * 100) # check resubmission value2 = sess.run(c, timeout=timeout) assert_array_equal(value, value2) # check when local compression libs are missing from mars.serialize import dataserializer try: a = mt.ones((10, 10), chunk_size=30) b = mt.ones((10, 10), chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, np.ones((10, 10)) * 10) dataserializer.decompressors[ dataserializer.CompressType.LZ4] = None dataserializer.decompressobjs[ dataserializer.CompressType.LZ4] = None dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = None assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10) finally: dataserializer.decompressors[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompress dataserializer.decompressobjs[ dataserializer.CompressType. LZ4] = dataserializer.lz4_decompressobj dataserializer.compress_openers[ dataserializer.CompressType.LZ4] = dataserializer.lz4_open va = np.random.randint(0, 10000, (100, 100)) vb = np.random.randint(0, 10000, (100, 100)) a = mt.array(va, chunk_size=30) b = mt.array(vb, chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=timeout) assert_array_equal(value, va.dot(vb)) graphs = sess.get_graph_states() # check web UI requests res = requests.get(service_ep) self.assertEqual(res.status_code, 200) res = requests.get('%s/task' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler?endpoint=127.0.0.1:%s' % (service_ep, self.scheduler_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker?endpoint=127.0.0.1:%s' % (service_ep, self.worker_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/task' % (service_ep, )) self.assertEqual(res.status_code, 200) task_id = next(iter(graphs.keys())) res = requests.get('%s/task?session_id=%s&task_id=%s' % (service_ep, sess._session_id, task_id)) self.assertEqual(res.status_code, 200)