def step_with_fail(self): worker_stats = [w.step.remote() for w in self.workers] if self._num_failures < 1: time.sleep(1) # Make the batch will fail correctly. self.workers[0].__ray_kill__() success = check_for_failure(worker_stats) return success, worker_stats
def step_with_fail(self): worker_stats = [w.step.remote() for w in self.workers] if self._num_failures < 2: time.sleep(1) self.workers[0].__ray_kill__() success = check_for_failure(worker_stats) return success, worker_stats
def _train_step(self): worker_stats = [w.step.remote() for w in self.workers] success = utils.check_for_failure(worker_stats) return success, worker_stats