Exemplo n.º 1
0
    def testDistributedContext(self):
        self.start_processes(etcd=False)
        sess = new_session(self.session_manager_ref.address)
        rs = np.random.RandomState(0)
        context = DistributedContext(
            scheduler_address=self.session_manager_ref.address,
            session_id=sess.session_id)

        raw1 = rs.rand(10, 10)
        a = mt.tensor(raw1, chunk_size=4)
        a.execute(session=sess, timeout=self.timeout, name='test')

        tileable_infos = context.get_named_tileable_infos('test')
        self.assertEqual(a.key, tileable_infos.tileable_key)
        self.assertEqual(a.shape, tileable_infos.tileable_shape)

        nsplits = context.get_tileable_metas([a.key],
                                             filter_fields=['nsplits'])[0][0]
        self.assertEqual(((4, 4, 2), (4, 4, 2)), nsplits)

        r = context.get_tileable_data(a.key)
        np.testing.assert_array_equal(raw1, r)

        indexes = [slice(3, 9), slice(0, 7)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexes = [[1, 4, 2, 4, 5], slice(None, None, None)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexes = ([9, 1, 2, 0], [0, 0, 4, 4])
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[[9, 1, 2, 0], [0, 0, 4, 4]], r)
Exemplo n.º 2
0
    def testDistributedSampler(self, *_):
        service_ep = 'http://127.0.0.1:' + self.web_port
        scheduler_ep = '127.0.0.1:' + self.scheduler_port
        with new_session(service_ep) as sess:
            raw1 = np.random.rand(100, 200)
            data1 = mt.tensor(raw1, chunk_size=40)
            data1.execute(name='data1', session=sess)

            raw2 = np.random.rand(100,)
            data2 = mt.tensor(raw2, chunk_size=60)
            data2.execute(name='data2', session=sess)

            with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id):
                dataset = MarsDataset('data1', 'data2')
                self.assertEqual(len(dataset), 100)

                sampler = MarsDistributedSampler(dataset, num_replicas=1, rank=0)
                indices = sampler.generate_indices()
                r1 = np.array(dataset._get_data(indices)[0])
                r2 = np.array([dataset[ind][0] for ind in sampler])
                np.testing.assert_array_equal(r1, r2)

                r1 = np.array(dataset._get_data(indices)[1])
                r2 = np.array([dataset[ind][1] for ind in sampler])
                np.testing.assert_array_equal(r1, r2)

                self.assertEqual(len(sampler), 100)

                sampler.set_epoch(1)
                self.assertEqual(sampler.epoch, 1)
Exemplo n.º 3
0
    def testDistributedContext(self):
        self.start_processes(etcd=False)

        session_id = uuid.uuid1()
        actor_client = new_client()
        rs = np.random.RandomState(0)

        context = DistributedContext(
            scheduler_address=self.scheduler_endpoints[0],
            session_id=session_id)

        session_ref = actor_client.actor_ref(
            self.session_manager_ref.create_session(session_id))
        raw1 = rs.rand(10, 10)
        a = mt.tensor(raw1, chunk_size=4)

        graph = a.build_graph()
        targets = [a.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets,
                                          names=['test'])

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        tileable_infos = context.get_named_tileable_infos('test')
        self.assertEqual(a.key, tileable_infos.tileable_key)
        self.assertEqual(a.shape, tileable_infos.tileable_shape)

        nsplits = context.get_tileable_metas([a.key],
                                             filter_fields=['nsplits'])[0][0]
        self.assertEqual(((4, 4, 2), (4, 4, 2)), nsplits)

        r = context.get_tileable_data(a.key)
        np.testing.assert_array_equal(raw1, r)

        indexes = [slice(3, 9), slice(0, 7)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexes = [[1, 4, 2, 4, 5], slice(None, None, None)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexes = ([9, 1, 2, 0], [0, 0, 4, 4])
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[[9, 1, 2, 0], [0, 0, 4, 4]], r)
Exemplo n.º 4
0
    def testDistributedRunPyTorchScript(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        scheduler_ep = '127.0.0.1:' + self.scheduler_port
        with new_session(service_ep) as sess:
            raw = np.random.rand(100, 200)
            data = mt.tensor(raw, chunk_size=40)
            data.execute(name='data', session=sess)

            with DistributedContext(scheduler_address=scheduler_ep,
                                    session_id=sess.session_id):
                dataset = MarsDataset('data')
                self.assertEqual(len(dataset), 100)

                sample = [2, 5, 7, 9, 10]
                r1 = dataset[sample][0]
                np.testing.assert_array_equal(raw[sample], r1)
Exemplo n.º 5
0
    def testDistributedDataset(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        scheduler_ep = '127.0.0.1:' + self.scheduler_port
        with new_session(service_ep) as sess:
            raw = np.random.rand(100, 200)
            data = mt.tensor(raw, chunk_size=40)
            data.execute(name='data', session=sess)

            with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id):
                dataset = MarsDataset('data')
                self.assertEqual(len(dataset), 100)

                sample = np.random.randint(0, 100, (10,))
                r1 = dataset[sample][0]
                np.testing.assert_array_equal(raw[sample], r1)

                sample = np.random.randint(0, 100, (10,))
                dataset.prefetch(sample)
                r2 = np.array([dataset[ind][0] for ind in sample])
                np.testing.assert_array_equal(raw[sample], r2)
Exemplo n.º 6
0
    def testFetchLogWithoutEtcd(self):
        # test fetch log
        with tempfile.TemporaryDirectory() as temp_dir:
            self.start_processes(
                etcd=False,
                modules=['mars.scheduler.tests.integrated.no_prepare_op'],
                scheduler_args=[f'-Dcustom_log_dir={temp_dir}'])
            sess = new_session(self.session_manager_ref.address)

            def f():
                print('test')

            r = spawn(f)
            r.execute(session=sess)

            custom_log_actor = sess._api.actor_client.actor_ref(
                CustomLogMetaActor.default_uid(),
                address=self.cluster_info.get_scheduler(
                    CustomLogMetaActor.default_uid()))

            chunk_key_to_log_path = custom_log_actor.get_tileable_op_log_paths(
                sess.session_id, r.op.key)
            paths = list(chunk_key_to_log_path.values())
            self.assertEqual(len(paths), 1)
            log_path = paths[0][1]
            with open(log_path) as f:
                self.assertEqual(f.read().strip(), 'test')

            context = DistributedContext(
                scheduler_address=self.session_manager_ref.address,
                session_id=sess.session_id)
            log_result = context.fetch_tileable_op_logs(r.op.key)
            log = next(iter(log_result.values()))['log']
            self.assertEqual(log.strip(), 'test')

            log = r.fetch_log()
            self.assertEqual(str(log).strip(), 'test')

            # test multiple functions
            def f1(size):
                print('f1' * size)
                sys.stdout.flush()

            fs = ExecutableTuple([spawn(f1, 30), spawn(f1, 40)])
            fs.execute(session=sess)
            log = fs.fetch_log(offsets=20, sizes=10)
            self.assertEqual(str(log[0]).strip(), ('f1' * 30)[20:30])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40)[20:30])
            self.assertGreater(len(log[0].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            # test negative offsets
            log = fs.fetch_log(offsets=-20, sizes=10)
            self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10])
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            # test negative offsets which represented in string
            log = fs.fetch_log(offsets='-0.02K', sizes='0.01K')
            self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10])
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            def test_nested():
                print('level0')
                fr = spawn(f1, 1)
                fr.execute()
                print(fr.fetch_log())

            r = spawn(test_nested)
            with self.assertRaises(ValueError):
                r.fetch_log()
            r.execute(session=sess)
            log = str(r.fetch_log())
            self.assertIn('level0', log)
            self.assertIn('f1', log)

            df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5)

            def df_func(c):
                print('df func')
                return c

            df2 = df.map_chunk(df_func)
            df2.execute(session=sess)
            log = df2.fetch_log()
            self.assertIn('Chunk op key:', str(log))
            self.assertIn('df func', repr(log))
            self.assertEqual(len(str(df.fetch_log(session=sess))), 0)
Exemplo n.º 7
0
    def testFetchLogWithoutEtcd(self):
        # test fetch log
        with tempfile.TemporaryDirectory() as temp_dir:
            self.start_processes(etcd=False, modules=['mars.scheduler.tests.integrated.no_prepare_op'],
                                 scheduler_args=[f'-Dcustom_log_dir={temp_dir}'])
            sess = new_session(self.session_manager_ref.address)

            def f():
                print('test')

            r = spawn(f)
            r.execute(session=sess)

            custom_log_actor = sess._api.actor_client.actor_ref(
                CustomLogMetaActor.default_uid(),
                address=self.cluster_info.get_scheduler(CustomLogMetaActor.default_uid())
            )

            chunk_key_to_log_path = custom_log_actor.get_tileable_op_log_paths(
                sess.session_id, r.op.key)
            paths = list(chunk_key_to_log_path.values())
            self.assertEqual(len(paths), 1)
            log_path = paths[0][1]
            with open(log_path) as f:
                self.assertEqual(f.read().strip(), 'test')

            context = DistributedContext(scheduler_address=self.session_manager_ref.address,
                                         session_id=sess.session_id)
            log_result = context.fetch_tileable_op_logs(r.op.key)
            log = next(iter(log_result.values()))['log']
            self.assertEqual(log.strip(), 'test')

            log = r.fetch_log()
            self.assertEqual(str(log).strip(), 'test')

            # test multiple functions
            def f1(size):
                print('f1' * size)
                sys.stdout.flush()

            fs = ExecutableTuple([spawn(f1, 30), spawn(f1, 40)])
            fs.execute(session=sess)
            log = fs.fetch_log(offsets=20, sizes=10)
            self.assertEqual(str(log[0]).strip(), ('f1' * 30)[20:30])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40)[20:30])
            self.assertGreater(len(log[0].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            # test negative offsets
            log = fs.fetch_log(offsets=-20, sizes=10)
            self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10])
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            # test negative offsets which represented in string
            log = fs.fetch_log(offsets='-0.02K', sizes='0.01K')
            self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10])
            self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10])
            self.assertTrue(all(s > 0 for s in log[0].offsets))
            self.assertGreater(len(log[1].offsets), 0)
            self.assertTrue(all(s > 0 for s in log[1].offsets))
            self.assertGreater(len(log[0].chunk_op_keys), 0)

            def test_nested():
                print('level0')
                fr = spawn(f1, 1)
                fr.execute()
                print(fr.fetch_log())

            r = spawn(test_nested)
            with self.assertRaises(ValueError):
                r.fetch_log()
            r.execute(session=sess)
            log = str(r.fetch_log())
            self.assertIn('level0', log)
            self.assertIn('f1', log)

            df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5)

            def df_func(c):
                print('df func')
                return c

            df2 = df.map_chunk(df_func)
            df2.execute(session=sess)
            log = df2.fetch_log()
            self.assertIn('Chunk op key:', str(log))
            self.assertIn('df func', repr(log))
            self.assertEqual(len(str(df.fetch_log(session=sess))), 0)

            def test_host(rndf):
                rm = spawn(nested, rndf)
                rm.execute()
                print(rm.fetch_log())

            def nested(_rndf):
                print('log_content')

            ds = [spawn(test_host, n, retry_when_fail=False)
                  for n in np.random.rand(4)]
            xtp = ExecutableTuple(ds)
            xtp.execute(session=sess)
            for log in xtp.fetch_log(session=sess):
                self.assertEqual(str(log).strip(), 'log_content')

            def test_threaded():
                import threading

                exc_info = None

                def print_fun():
                    nonlocal exc_info
                    try:
                        print('inner')
                    except:  # noqa: E722  # nosec  # pylint: disable=bare-except
                        exc_info = sys.exc_info()

                print_thread = threading.Thread(target=print_fun)
                print_thread.start()
                print_thread.join()

                if exc_info is not None:
                    raise exc_info[1].with_traceback(exc_info[-1])

                print('after')

            rm = spawn(test_threaded)
            rm.execute(session=sess)
            logs = str(rm.fetch_log(session=sess)).strip()
            self.assertEqual(logs, 'inner\nafter')