def test_dump_error_file_overwrite_existing(self): dst_error_file = os.path.join(self.test_dir, "dst_error.json") src_error_file = os.path.join(self.test_dir, "src_error.json") _write_error(RuntimeError("foo"), dst_error_file) _write_error(RuntimeError("bar"), src_error_file) with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": dst_error_file}): eh = ErrorHandler() eh.dump_error_file(src_error_file) self.assertTrue(filecmp.cmp(src_error_file, dst_error_file))
def failure_with_error_file(self, exception): with mock.patch.dict( os.environ, {"TORCHELASTIC_ERROR_FILE": self.test_error_file}): ErrorHandler().record_exception(exception) return ProcessFailure(local_rank=0, pid=997, exitcode=1, error_file=self.test_error_file)
def raise_child_failure_error_fn(name, child_error_file=""): if child_error_file: with mock.patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": child_error_file}): ErrorHandler().record_exception(SentinelError("foobar")) pf = ProcessFailure(local_rank=0, pid=997, exitcode=1, error_file=child_error_file) raise ChildFailedError(name, {0: pf})
def test_copy_error_file(self): src_error_file = os.path.join(self.test_dir, "src_error.json") _write_error(RuntimeError("foobar"), src_error_file) with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": self.test_error_file}): eh = ErrorHandler() eh.copy_error_file(src_error_file) self.assertTrue(filecmp.cmp(src_error_file, self.test_error_file)) with patch.dict(os.environ, {}): eh = ErrorHandler() eh.copy_error_file(src_error_file)
def test_dump_error_file(self): src_error_file = os.path.join(self.test_dir, "src_error.json") eh = ErrorHandler() with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": src_error_file}): eh.record_exception(RuntimeError("foobar")) with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": self.test_error_file}): eh.dump_error_file(src_error_file) self.assertTrue(filecmp.cmp(src_error_file, self.test_error_file)) with patch.dict(os.environ, {}): eh.dump_error_file(src_error_file)
def test_record_exception(self): with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": self.test_error_file}): eh = ErrorHandler() eh.initialize() try: raise_exception_fn() except Exception as e: eh.record_exception(e) with open(self.test_error_file, "r") as fp: err = json.load(fp) # error file content example: # { # "message": { # "message": "RuntimeError: foobar", # "extraInfo": { # "py_callstack": "Traceback (most recent call last):\n <... OMITTED ...>", # "timestamp": "1605774851" # } # } self.assertIsNotNone(err["message"]["message"]) self.assertIsNotNone(err["message"]["extraInfo"]["py_callstack"]) self.assertIsNotNone(err["message"]["extraInfo"]["timestamp"])
def test_record_exception_no_error_file(self): # make sure record does not fail when no error file is specified in env vars with patch.dict(os.environ, {}): eh = ErrorHandler() eh.initialize() try: raise_exception_fn() except Exception as e: eh.record_exception(e)
def get_error_handler(): return ErrorHandler()
def test_initialize_error(self, fh_enable_mock): # makes sure that initialize handles errors gracefully ErrorHandler().initialize() fh_enable_mock.assert_called_once()
def test_initialize(self, fh_enable_mock): ErrorHandler().initialize() fh_enable_mock.assert_called_once()