def test_is_failed(self): pr_success = RunProcsResult(return_values={0: "a", 1: "b"}) self.assertFalse(pr_success.is_failed()) fail0 = ProcessFailure( local_rank=0, pid=998, exitcode=1, error_file="ignored.json" ) pr_fail = RunProcsResult(failures={0: fail0}) self.assertTrue(pr_fail.is_failed())
def test_get_failures(self, log_mock): with mock.patch("time.time", side_effect=[3, 2, 1]): error_file0 = os.path.join(self.test_dir, "error0.json") error_file1 = os.path.join(self.test_dir, "error1.json") _write_error(RuntimeError("error 0"), error_file0) _write_error(RuntimeError("error 1"), error_file1) fail0 = ProcessFailure( local_rank=0, pid=997, exitcode=1, error_file=error_file0 ) fail1 = ProcessFailure( local_rank=1, pid=998, exitcode=3, error_file=error_file1 ) fail2 = ProcessFailure( local_rank=2, pid=999, exitcode=15, error_file="no_exist.json" ) self.assertEqual(3, fail0.timestamp) self.assertEqual(2, fail1.timestamp) self.assertEqual(1, fail2.timestamp)
def test_get_failures(self): error_file0 = os.path.join(self.test_dir, "error0.json") error_file1 = os.path.join(self.test_dir, "error1.json") eh = ErrorHandler() with mock.patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": error_file0}): eh.record_exception(RuntimeError("error 0")) with mock.patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": error_file0}): eh.record_exception(RuntimeError("error 1")) fail0 = ProcessFailure( local_rank=0, pid=997, exitcode=1, error_file=error_file0 ) fail1 = ProcessFailure( local_rank=1, pid=998, exitcode=3, error_file=error_file1 ) fail2 = ProcessFailure( local_rank=2, pid=999, exitcode=15, error_file="no_exist.json" ) self.assertLessEqual(fail0.timestamp, fail1.timestamp) self.assertLessEqual(fail1.timestamp, fail2.timestamp)