Python start_methods示例，torchelastic.test.test_utils.start_methods Python示例

示例#1

0

显示文件

    def test_function_large_ret_val(self):
        # python multiprocessing.queue module uses pipes and actually PipedQueues
        # This means that if a single object is greater than a pipe size
        # the writer process will block until reader process will start
        # reading the pipe.
        # This test makes a worker fn to return huge output, around ~10 MB

        size = 200000
        for start_method in start_methods():
            with self.subTest(start_method=start_method):
                pc = start_processes(
                    name="echo",
                    entrypoint=echo_large,
                    args={
                        0: (size, ),
                        1: (size, ),
                        2: (size, ),
                        3: (size, )
                    },
                    envs={
                        0: {},
                        1: {},
                        2: {},
                        3: {}
                    },
                    log_dir=self.log_dir(),
                    start_method=start_method,
                )

                results = pc.wait(period=0.1)
                for i in range(pc.nprocs):
                    self.assertEqual(size, len(results.return_values[i]))

示例#2

0

显示文件

文件： api_test.py 项目： rejinjoy18/elastic

    def test_function_signal(self):
        """
        run 2x copies of echo3, induce a segfault on first
        """
        SEGFAULT = True
        for start_method, redirs in product(start_methods(), redirects()):
            with self.subTest(start_method=start_method):
                log_dir = self.log_dir()
                pc = start_processes(
                    name="echo",
                    entrypoint=echo3,
                    args={0: ("hello", SEGFAULT), 1: ("world",)},
                    envs={0: {}, 1: {}},
                    log_dir=log_dir,
                    start_method=start_method,
                    redirects=redirs,
                )

                results = pc.wait(period=0.1)

                self.assert_pids_noexist(pc.pids())
                self.assertEqual(1, len(results.failures))
                self.assertFalse(results.return_values)

                failure = results.failures[0]
                error_file = failure.error_file

                self.assertEqual(-signal.SIGSEGV, failure.exitcode)
                self.assertEqual("SIGSEGV", failure.signal_name())
                self.assertEqual(pc.pids()[0], failure.pid)
                self.assertEqual(os.path.join(log_dir, "0", "error.json"), error_file)

示例#3

0

显示文件

文件： api_test.py 项目： rejinjoy18/elastic

    def test_function(self):
        for start_method, redirs in product(start_methods(), redirects()):
            with self.subTest(start_method=start_method, redirs=redirs):
                pc = start_processes(
                    name="echo",
                    entrypoint=echo1,
                    args={0: ("hello",), 1: ("hello",)},
                    envs={0: {"RANK": "0"}, 1: {"RANK": "1"}},
                    log_dir=self.log_dir(),
                    start_method=start_method,
                    redirects=redirs,
                )

                results = pc.wait(period=0.1)
                nprocs = pc.nprocs

                self.assert_pids_noexist(pc.pids())
                self.assertEqual(
                    {i: f"hello_{i}" for i in range(nprocs)}, results.return_values
                )

                for i in range(nprocs):
                    if redirs & Std.OUT != Std.OUT:
                        self.assertFalse(results.stdouts[i])
                    if redirs & Std.ERR != Std.ERR:
                        self.assertFalse(results.stderrs[i])
                    if redirs & Std.OUT == Std.OUT:
                        self.assert_in_file(
                            [f"hello stdout from {i}"], results.stdouts[i]
                        )
                    if redirs & Std.ERR == Std.ERR:
                        self.assert_in_file(
                            [f"hello stderr from {i}"], results.stderrs[i]
                        )

示例#4

0

显示文件

    def test_function_exit(self):
        """
        run 2x copies of echo1 fail (exit) the first
        functions that exit from python do not generate an error file
        (even if they are decorated with @record)
        """

        FAIL = 138
        for start_method in start_methods():
            with self.subTest(start_method=start_method):
                log_dir = self.log_dir()
                pc = start_processes(
                    name="echo",
                    entrypoint=echo1,
                    args={
                        0: ("hello", FAIL),
                        1: ("hello", )
                    },
                    envs={
                        0: {
                            "RANK": "0"
                        },
                        1: {
                            "RANK": "1"
                        }
                    },
                    log_dir=log_dir,
                    start_method=start_method,
                    redirects={0: Std.ERR},
                )

                results = pc.wait(period=0.1)

                self.assert_pids_noexist(pc.pids())
                self.assertTrue(results.is_failed())
                self.assertEqual(1, len(results.failures))
                self.assertFalse(results.return_values)

                failure = results.failures[0]
                error_file = failure.error_file

                self.assertEqual(FAIL, failure.exitcode)
                self.assertEqual("<N/A>", failure.signal_name())
                self.assertEqual(pc.pids()[0], failure.pid)
                self.assertEqual("<N/A>", error_file)
                self.assertEqual(f"Process failed with exitcode {FAIL}",
                                 failure.message)
                self.assertLessEqual(failure.timestamp, int(time.time()))

                self.assert_in_file([f"exit {FAIL} from 0"],
                                    results.stderrs[0])
                self.assertFalse(results.stdouts[0])
                self.assertFalse(results.stderrs[1])
                self.assertFalse(results.stdouts[1])
                self.assertTrue(pc._stderr_tail.stopped())
                self.assertTrue(pc._stdout_tail.stopped())

示例#5

0

显示文件

文件： api_test.py 项目： rejinjoy18/elastic

    def test_void_function(self):
        for start_method in start_methods():
            with self.subTest(start_method=start_method):
                pc = start_processes(
                    name="echo",
                    entrypoint=echo0,
                    args={0: ("hello",), 1: ("world",)},
                    envs={0: {}, 1: {}},
                    log_dir=self.log_dir(),
                    start_method=start_method,
                )

                results = pc.wait(period=0.1)
                self.assertEqual({0: None, 1: None}, results.return_values)

示例#6

0

显示文件

    def test_function_raise(self):
        """
        run 2x copies of echo2, raise an exception on the first
        """
        RAISE = True

        for start_method in start_methods():
            with self.subTest(start_method=start_method):
                log_dir = self.log_dir()
                pc = start_processes(
                    name="echo",
                    entrypoint=echo2,
                    args={
                        0: ("hello", RAISE),
                        1: ("world", )
                    },
                    envs={
                        0: {},
                        1: {}
                    },
                    log_dir=log_dir,
                    start_method=start_method,
                )

                results = pc.wait(period=0.1)

                self.assert_pids_noexist(pc.pids())
                self.assertEqual(1, len(results.failures))
                self.assertFalse(results.return_values)

                failure = results.failures[0]
                error_file = failure.error_file
                error_file_data = failure.error_file_data

                self.assertEqual(1, failure.exitcode)
                self.assertEqual("<N/A>", failure.signal_name())
                self.assertEqual(pc.pids()[0], failure.pid)
                self.assertEqual(os.path.join(log_dir, "0", "error.json"),
                                 error_file)
                self.assertEqual(
                    int(error_file_data["message"]["extraInfo"]["timestamp"]),
                    int(failure.timestamp),
                )
                self.assertTrue(pc._stderr_tail.stopped())
                self.assertTrue(pc._stdout_tail.stopped())

示例#7

0

显示文件

    def test_function_redirect_and_tee(self):
        for start_method in start_methods():
            with self.subTest(start_method=start_method):
                log_dir = self.log_dir()
                pc = start_processes(
                    name="trainer",
                    entrypoint=echo1,
                    args={
                        0: ("hello", ),
                        1: ("world", )
                    },
                    envs={
                        0: {
                            "RANK": "0"
                        },
                        1: {
                            "RANK": "1"
                        }
                    },
                    log_dir=log_dir,
                    start_method="fork",
                    redirects={
                        0: Std.ERR,
                        1: Std.NONE
                    },
                    tee={
                        0: Std.OUT,
                        1: Std.ERR
                    },
                )

                result = pc.wait()

                self.assertFalse(result.is_failed())
                self.assert_in_file(["hello stdout from 0"], pc.stdouts[0])
                self.assert_in_file(["hello stderr from 0"], pc.stderrs[0])
                self.assert_in_file(["world stderr from 1"], pc.stderrs[1])
                self.assertFalse(pc.stdouts[1])
                self.assertTrue(pc._stderr_tail.stopped())
                self.assertTrue(pc._stdout_tail.stopped())