示例#1
0
    def test_is_failed(self):
        pr_success = RunProcsResult(return_values={0: "a", 1: "b"})
        self.assertFalse(pr_success.is_failed())

        fail0 = ProcessFailure(
            local_rank=0, pid=998, exitcode=1, error_file="ignored.json"
        )
        pr_fail = RunProcsResult(failures={0: fail0})
        self.assertTrue(pr_fail.is_failed())
示例#2
0
    def test_get_failures(self, log_mock):
        with mock.patch("time.time", side_effect=[3, 2, 1]):
            error_file0 = os.path.join(self.test_dir, "error0.json")
            error_file1 = os.path.join(self.test_dir, "error1.json")
            _write_error(RuntimeError("error 0"), error_file0)
            _write_error(RuntimeError("error 1"), error_file1)

            fail0 = ProcessFailure(
                local_rank=0, pid=997, exitcode=1, error_file=error_file0
            )
            fail1 = ProcessFailure(
                local_rank=1, pid=998, exitcode=3, error_file=error_file1
            )
            fail2 = ProcessFailure(
                local_rank=2, pid=999, exitcode=15, error_file="no_exist.json"
            )

            self.assertEqual(3, fail0.timestamp)
            self.assertEqual(2, fail1.timestamp)
            self.assertEqual(1, fail2.timestamp)
示例#3
0
    def test_get_failures(self):

        error_file0 = os.path.join(self.test_dir, "error0.json")
        error_file1 = os.path.join(self.test_dir, "error1.json")
        eh = ErrorHandler()
        with mock.patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": error_file0}):
            eh.record_exception(RuntimeError("error 0"))

        with mock.patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": error_file0}):
            eh.record_exception(RuntimeError("error 1"))

        fail0 = ProcessFailure(
            local_rank=0, pid=997, exitcode=1, error_file=error_file0
        )
        fail1 = ProcessFailure(
            local_rank=1, pid=998, exitcode=3, error_file=error_file1
        )
        fail2 = ProcessFailure(
            local_rank=2, pid=999, exitcode=15, error_file="no_exist.json"
        )

        self.assertLessEqual(fail0.timestamp, fail1.timestamp)
        self.assertLessEqual(fail1.timestamp, fail2.timestamp)